diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,29587 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "global_step": 4927, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.7567567567567575e-06, + "loss": 8.3828, + "step": 1 + }, + { + "epoch": 0.0, + "learning_rate": 1.3513513513513515e-05, + "loss": 8.4141, + "step": 2 + }, + { + "epoch": 0.0, + "learning_rate": 2.0270270270270273e-05, + "loss": 8.0742, + "step": 3 + }, + { + "epoch": 0.0, + "learning_rate": 2.702702702702703e-05, + "loss": 6.5039, + "step": 4 + }, + { + "epoch": 0.0, + "learning_rate": 3.3783783783783784e-05, + "loss": 5.9336, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 4.0540540540540545e-05, + "loss": 5.7773, + "step": 6 + }, + { + "epoch": 0.0, + "learning_rate": 4.72972972972973e-05, + "loss": 5.5625, + "step": 7 + }, + { + "epoch": 0.0, + "learning_rate": 5.405405405405406e-05, + "loss": 5.418, + "step": 8 + }, + { + "epoch": 0.0, + "learning_rate": 6.0810810810810814e-05, + "loss": 5.3477, + "step": 9 + }, + { + "epoch": 0.0, + "learning_rate": 6.756756756756757e-05, + "loss": 5.2227, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 7.432432432432433e-05, + "loss": 5.1172, + "step": 11 + }, + { + "epoch": 0.0, + "learning_rate": 8.108108108108109e-05, + "loss": 5.0039, + "step": 12 + }, + { + "epoch": 0.0, + "learning_rate": 8.783783783783784e-05, + "loss": 4.8867, + "step": 13 + }, + { + "epoch": 0.0, + "learning_rate": 9.45945945945946e-05, + "loss": 4.793, + "step": 14 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010135135135135136, + "loss": 4.7617, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 0.00010810810810810812, + "loss": 4.668, + "step": 16 + }, + { + "epoch": 0.0, + "learning_rate": 0.00011486486486486487, + "loss": 4.5, + "step": 17 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012162162162162163, + "loss": 4.5703, + "step": 18 + }, + { + "epoch": 0.0, + "learning_rate": 0.00012837837837837836, + "loss": 4.4297, + "step": 19 + }, + { + "epoch": 0.0, + "learning_rate": 0.00013513513513513514, + "loss": 4.3594, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014189189189189188, + "loss": 4.3672, + "step": 21 + }, + { + "epoch": 0.0, + "learning_rate": 0.00014864864864864866, + "loss": 4.2012, + "step": 22 + }, + { + "epoch": 0.0, + "learning_rate": 0.0001554054054054054, + "loss": 4.1562, + "step": 23 + }, + { + "epoch": 0.0, + "learning_rate": 0.00016216216216216218, + "loss": 4.082, + "step": 24 + }, + { + "epoch": 0.01, + "learning_rate": 0.00016891891891891893, + "loss": 4.0234, + "step": 25 + }, + { + "epoch": 0.01, + "learning_rate": 0.00017567567567567568, + "loss": 3.9902, + "step": 26 + }, + { + "epoch": 0.01, + "learning_rate": 0.00018243243243243242, + "loss": 3.8652, + "step": 27 + }, + { + "epoch": 0.01, + "learning_rate": 0.0001891891891891892, + "loss": 3.8164, + "step": 28 + }, + { + "epoch": 0.01, + "learning_rate": 0.00019594594594594594, + "loss": 3.6934, + "step": 29 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020270270270270272, + "loss": 3.5957, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 0.00020945945945945947, + "loss": 3.7227, + "step": 31 + }, + { + "epoch": 0.01, + "learning_rate": 0.00021621621621621624, + "loss": 3.5996, + "step": 32 + }, + { + "epoch": 0.01, + "learning_rate": 0.000222972972972973, + "loss": 3.5293, + "step": 33 + }, + { + "epoch": 0.01, + "learning_rate": 0.00022972972972972974, + "loss": 3.5391, + "step": 34 + }, + { + "epoch": 0.01, + "learning_rate": 0.00023648648648648648, + "loss": 3.5, + "step": 35 + }, + { + "epoch": 0.01, + "learning_rate": 0.00024324324324324326, + "loss": 3.2871, + "step": 36 + }, + { + "epoch": 0.01, + "learning_rate": 0.00025, + "loss": 3.4434, + "step": 37 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002567567567567567, + "loss": 3.3789, + "step": 38 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002635135135135135, + "loss": 3.3965, + "step": 39 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002702702702702703, + "loss": 3.3652, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 0.00027702702702702705, + "loss": 3.3418, + "step": 41 + }, + { + "epoch": 0.01, + "learning_rate": 0.00028378378378378377, + "loss": 3.2324, + "step": 42 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029054054054054054, + "loss": 3.1895, + "step": 43 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002972972972972973, + "loss": 3.1895, + "step": 44 + }, + { + "epoch": 0.01, + "learning_rate": 0.00030405405405405404, + "loss": 3.2168, + "step": 45 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003108108108108108, + "loss": 3.1504, + "step": 46 + }, + { + "epoch": 0.01, + "learning_rate": 0.00031756756756756753, + "loss": 3.2188, + "step": 47 + }, + { + "epoch": 0.01, + "learning_rate": 0.00032432432432432436, + "loss": 3.207, + "step": 48 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003310810810810811, + "loss": 3.1289, + "step": 49 + }, + { + "epoch": 0.01, + "learning_rate": 0.00033783783783783786, + "loss": 3.0703, + "step": 50 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003445945945945946, + "loss": 3.0898, + "step": 51 + }, + { + "epoch": 0.01, + "learning_rate": 0.00035135135135135135, + "loss": 3.1211, + "step": 52 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003581081081081081, + "loss": 3.0371, + "step": 53 + }, + { + "epoch": 0.01, + "learning_rate": 0.00036486486486486485, + "loss": 3.1016, + "step": 54 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003716216216216216, + "loss": 3.0625, + "step": 55 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003783783783783784, + "loss": 3.0195, + "step": 56 + }, + { + "epoch": 0.01, + "learning_rate": 0.00038513513513513517, + "loss": 3.0039, + "step": 57 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003918918918918919, + "loss": 2.9277, + "step": 58 + }, + { + "epoch": 0.01, + "learning_rate": 0.00039864864864864866, + "loss": 2.9668, + "step": 59 + }, + { + "epoch": 0.01, + "learning_rate": 0.00040540540540540544, + "loss": 2.998, + "step": 60 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041216216216216216, + "loss": 3.0215, + "step": 61 + }, + { + "epoch": 0.01, + "learning_rate": 0.00041891891891891893, + "loss": 3.084, + "step": 62 + }, + { + "epoch": 0.01, + "learning_rate": 0.00042567567567567565, + "loss": 2.9883, + "step": 63 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004324324324324325, + "loss": 2.8887, + "step": 64 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004391891891891892, + "loss": 3.0293, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 0.000445945945945946, + "loss": 2.8965, + "step": 66 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004527027027027027, + "loss": 2.8457, + "step": 67 + }, + { + "epoch": 0.01, + "learning_rate": 0.00045945945945945947, + "loss": 2.9082, + "step": 68 + }, + { + "epoch": 0.01, + "learning_rate": 0.00046621621621621625, + "loss": 2.9062, + "step": 69 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047297297297297297, + "loss": 2.998, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 0.00047972972972972974, + "loss": 2.918, + "step": 71 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004864864864864865, + "loss": 2.8926, + "step": 72 + }, + { + "epoch": 0.01, + "learning_rate": 0.0004932432432432432, + "loss": 2.8945, + "step": 73 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005, + "loss": 2.8789, + "step": 74 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005067567567567568, + "loss": 2.8145, + "step": 75 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005135135135135135, + "loss": 2.8945, + "step": 76 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005202702702702703, + "loss": 2.8828, + "step": 77 + }, + { + "epoch": 0.02, + "learning_rate": 0.000527027027027027, + "loss": 2.8711, + "step": 78 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005337837837837838, + "loss": 2.8926, + "step": 79 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005405405405405405, + "loss": 2.877, + "step": 80 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005472972972972973, + "loss": 2.9023, + "step": 81 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005540540540540541, + "loss": 2.9434, + "step": 82 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005608108108108109, + "loss": 2.8262, + "step": 83 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005675675675675675, + "loss": 2.916, + "step": 84 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005743243243243243, + "loss": 2.8281, + "step": 85 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005810810810810811, + "loss": 2.832, + "step": 86 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005878378378378379, + "loss": 2.8262, + "step": 87 + }, + { + "epoch": 0.02, + "learning_rate": 0.0005945945945945946, + "loss": 2.7988, + "step": 88 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006013513513513513, + "loss": 2.7969, + "step": 89 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006081081081081081, + "loss": 2.8555, + "step": 90 + }, + { + "epoch": 0.02, + "learning_rate": 0.000614864864864865, + "loss": 2.8887, + "step": 91 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006216216216216216, + "loss": 2.8184, + "step": 92 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006283783783783784, + "loss": 2.8516, + "step": 93 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006351351351351351, + "loss": 2.9199, + "step": 94 + }, + { + "epoch": 0.02, + "learning_rate": 0.000641891891891892, + "loss": 2.8496, + "step": 95 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006486486486486487, + "loss": 2.8203, + "step": 96 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006554054054054054, + "loss": 2.8359, + "step": 97 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006621621621621622, + "loss": 2.8086, + "step": 98 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006689189189189189, + "loss": 2.8613, + "step": 99 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006756756756756757, + "loss": 2.9062, + "step": 100 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006824324324324325, + "loss": 2.8594, + "step": 101 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006891891891891892, + "loss": 2.8477, + "step": 102 + }, + { + "epoch": 0.02, + "learning_rate": 0.0006959459459459459, + "loss": 2.7422, + "step": 103 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007027027027027027, + "loss": 2.8516, + "step": 104 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007094594594594595, + "loss": 2.7891, + "step": 105 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007162162162162163, + "loss": 2.8535, + "step": 106 + }, + { + "epoch": 0.02, + "learning_rate": 0.000722972972972973, + "loss": 2.793, + "step": 107 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007297297297297297, + "loss": 2.7754, + "step": 108 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007364864864864866, + "loss": 2.7773, + "step": 109 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007432432432432432, + "loss": 2.7773, + "step": 110 + }, + { + "epoch": 0.02, + "learning_rate": 0.00075, + "loss": 2.8105, + "step": 111 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007567567567567568, + "loss": 2.7266, + "step": 112 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007635135135135135, + "loss": 2.7441, + "step": 113 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007702702702702703, + "loss": 2.7598, + "step": 114 + }, + { + "epoch": 0.02, + "learning_rate": 0.000777027027027027, + "loss": 2.7578, + "step": 115 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007837837837837838, + "loss": 2.7891, + "step": 116 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007905405405405406, + "loss": 2.7363, + "step": 117 + }, + { + "epoch": 0.02, + "learning_rate": 0.0007972972972972973, + "loss": 2.7109, + "step": 118 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008040540540540541, + "loss": 2.6582, + "step": 119 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008108108108108109, + "loss": 2.6191, + "step": 120 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008175675675675675, + "loss": 2.7969, + "step": 121 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008243243243243243, + "loss": 2.6816, + "step": 122 + }, + { + "epoch": 0.02, + "learning_rate": 0.0008310810810810811, + "loss": 2.6738, + "step": 123 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008378378378378379, + "loss": 2.7422, + "step": 124 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008445945945945946, + "loss": 2.7754, + "step": 125 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008513513513513513, + "loss": 2.7754, + "step": 126 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008581081081081081, + "loss": 2.7363, + "step": 127 + }, + { + "epoch": 0.03, + "learning_rate": 0.000864864864864865, + "loss": 2.7129, + "step": 128 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008716216216216216, + "loss": 2.7617, + "step": 129 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008783783783783784, + "loss": 2.6523, + "step": 130 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008851351351351351, + "loss": 2.6895, + "step": 131 + }, + { + "epoch": 0.03, + "learning_rate": 0.000891891891891892, + "loss": 2.7559, + "step": 132 + }, + { + "epoch": 0.03, + "learning_rate": 0.0008986486486486487, + "loss": 2.6621, + "step": 133 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009054054054054054, + "loss": 2.7109, + "step": 134 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009121621621621622, + "loss": 2.7344, + "step": 135 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009189189189189189, + "loss": 2.6172, + "step": 136 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009256756756756757, + "loss": 2.6523, + "step": 137 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009324324324324325, + "loss": 2.6113, + "step": 138 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009391891891891892, + "loss": 2.7227, + "step": 139 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009459459459459459, + "loss": 2.6836, + "step": 140 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009527027027027027, + "loss": 2.6348, + "step": 141 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009594594594594595, + "loss": 2.6953, + "step": 142 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009662162162162163, + "loss": 2.6699, + "step": 143 + }, + { + "epoch": 0.03, + "learning_rate": 0.000972972972972973, + "loss": 2.7266, + "step": 144 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009797297297297297, + "loss": 2.6309, + "step": 145 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009864864864864865, + "loss": 2.6719, + "step": 146 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009932432432432432, + "loss": 2.6602, + "step": 147 + }, + { + "epoch": 0.03, + "learning_rate": 0.001, + "loss": 2.5957, + "step": 148 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999998919647012, + "loss": 2.6289, + "step": 149 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999995678588516, + "loss": 2.6777, + "step": 150 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999999027682591, + "loss": 2.5996, + "step": 151 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999998271436153, + "loss": 2.6289, + "step": 152 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999972991198646, + "loss": 2.6875, + "step": 153 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999961107341458, + "loss": 2.6055, + "step": 154 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999947062795098, + "loss": 2.7168, + "step": 155 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999930857565642, + "loss": 2.6406, + "step": 156 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999912491660088, + "loss": 2.6055, + "step": 157 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999891965086374, + "loss": 2.5625, + "step": 158 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999986927785337, + "loss": 2.6055, + "step": 159 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999844429970884, + "loss": 2.5684, + "step": 160 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999817421449649, + "loss": 2.6328, + "step": 161 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999788252301337, + "loss": 2.5801, + "step": 162 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999756922538553, + "loss": 2.6172, + "step": 163 + }, + { + "epoch": 0.03, + "learning_rate": 0.000999972343217484, + "loss": 2.5938, + "step": 164 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999687781224664, + "loss": 2.5703, + "step": 165 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999649969703436, + "loss": 2.6836, + "step": 166 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999609997627496, + "loss": 2.5938, + "step": 167 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999567865014117, + "loss": 2.5781, + "step": 168 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999523571881503, + "loss": 2.5469, + "step": 169 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999477118248798, + "loss": 2.5684, + "step": 170 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999428504136078, + "loss": 2.6465, + "step": 171 + }, + { + "epoch": 0.03, + "learning_rate": 0.0009999377729564348, + "loss": 2.5332, + "step": 172 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999324794555552, + "loss": 2.5508, + "step": 173 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999926969913256, + "loss": 2.5645, + "step": 174 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999921244331919, + "loss": 2.5801, + "step": 175 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999153027140178, + "loss": 2.5137, + "step": 176 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999091450621203, + "loss": 2.5273, + "step": 177 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009999027713788872, + "loss": 2.5508, + "step": 178 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998961816670732, + "loss": 2.6016, + "step": 179 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998893759295257, + "loss": 2.5508, + "step": 180 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998823541691858, + "loss": 2.5371, + "step": 181 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998751163890882, + "loss": 2.502, + "step": 182 + }, + { + "epoch": 0.04, + "learning_rate": 0.00099986766259236, + "loss": 2.5781, + "step": 183 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999859992782223, + "loss": 2.4941, + "step": 184 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998521069619912, + "loss": 2.4473, + "step": 185 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998440051350724, + "loss": 2.5234, + "step": 186 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998356873049678, + "loss": 2.5625, + "step": 187 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999827153475272, + "loss": 2.4727, + "step": 188 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999818403649673, + "loss": 2.4863, + "step": 189 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009998094378319514, + "loss": 2.5352, + "step": 190 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999800256025982, + "loss": 2.5449, + "step": 191 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997908582357324, + "loss": 2.6094, + "step": 192 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997812444652644, + "loss": 2.5039, + "step": 193 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999771414718732, + "loss": 2.5547, + "step": 194 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999761369000383, + "loss": 2.5469, + "step": 195 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997511073145586, + "loss": 2.5371, + "step": 196 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997406296656938, + "loss": 2.5039, + "step": 197 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997299360583157, + "loss": 2.5449, + "step": 198 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999719026497046, + "loss": 2.5, + "step": 199 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009997079009865988, + "loss": 2.459, + "step": 200 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999696559531782, + "loss": 2.4961, + "step": 201 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996850021374967, + "loss": 2.541, + "step": 202 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996732288087375, + "loss": 2.5117, + "step": 203 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996612395505922, + "loss": 2.4629, + "step": 204 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996490343682413, + "loss": 2.4219, + "step": 205 + }, + { + "epoch": 0.04, + "learning_rate": 0.00099963661326696, + "loss": 2.4922, + "step": 206 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999623976252115, + "loss": 2.5449, + "step": 207 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009996111233291681, + "loss": 2.4512, + "step": 208 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999598054503673, + "loss": 2.5371, + "step": 209 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995847697812778, + "loss": 2.4766, + "step": 210 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999571269167723, + "loss": 2.4707, + "step": 211 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995575526688426, + "loss": 2.541, + "step": 212 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995436202905649, + "loss": 2.4922, + "step": 213 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995294720389096, + "loss": 2.5156, + "step": 214 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995151079199916, + "loss": 2.502, + "step": 215 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009995005279400176, + "loss": 2.4258, + "step": 216 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994857321052885, + "loss": 2.4902, + "step": 217 + }, + { + "epoch": 0.04, + "learning_rate": 0.0009994707204221984, + "loss": 2.4473, + "step": 218 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999455492897234, + "loss": 2.4785, + "step": 219 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999440049536976, + "loss": 2.459, + "step": 220 + }, + { + "epoch": 0.04, + "learning_rate": 0.000999424390348098, + "loss": 2.5645, + "step": 221 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009994085153373674, + "loss": 2.4902, + "step": 222 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999392424511644, + "loss": 2.4883, + "step": 223 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993761178778812, + "loss": 2.4316, + "step": 224 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993595954431262, + "loss": 2.5195, + "step": 225 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009993428572145186, + "loss": 2.3906, + "step": 226 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999325903199292, + "loss": 2.4023, + "step": 227 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999308733404773, + "loss": 2.4102, + "step": 228 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999291347838381, + "loss": 2.5625, + "step": 229 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992737465076293, + "loss": 2.4707, + "step": 230 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999255929420124, + "loss": 2.4961, + "step": 231 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992378965835646, + "loss": 2.4102, + "step": 232 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999219648005744, + "loss": 2.459, + "step": 233 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009992011836945482, + "loss": 2.4434, + "step": 234 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999182503657956, + "loss": 2.4551, + "step": 235 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991636079040402, + "loss": 2.3945, + "step": 236 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991444964409664, + "loss": 2.4277, + "step": 237 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009991251692769933, + "loss": 2.4668, + "step": 238 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999105626420473, + "loss": 2.5176, + "step": 239 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990858678798507, + "loss": 2.4609, + "step": 240 + }, + { + "epoch": 0.05, + "learning_rate": 0.000999065893663665, + "loss": 2.3457, + "step": 241 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990457037805476, + "loss": 2.3809, + "step": 242 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990252982392234, + "loss": 2.4883, + "step": 243 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009990046770485103, + "loss": 2.4648, + "step": 244 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989838402173197, + "loss": 2.4141, + "step": 245 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998962787754656, + "loss": 2.4395, + "step": 246 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998941519669617, + "loss": 2.4766, + "step": 247 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009989200359713931, + "loss": 2.4668, + "step": 248 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988983366692689, + "loss": 2.4316, + "step": 249 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988764217726208, + "loss": 2.3633, + "step": 250 + }, + { + "epoch": 0.05, + "learning_rate": 0.00099885429129092, + "loss": 2.4277, + "step": 251 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988319452337293, + "loss": 2.4297, + "step": 252 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009988093836107057, + "loss": 2.4238, + "step": 253 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998786606431599, + "loss": 2.4219, + "step": 254 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998763613706252, + "loss": 2.4277, + "step": 255 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987404054446008, + "loss": 2.3789, + "step": 256 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009987169816566748, + "loss": 2.4551, + "step": 257 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998693342352596, + "loss": 2.4258, + "step": 258 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986694875425807, + "loss": 2.3887, + "step": 259 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986454172369369, + "loss": 2.3789, + "step": 260 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009986211314460664, + "loss": 2.3809, + "step": 261 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985966301804643, + "loss": 2.3555, + "step": 262 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985719134507185, + "loss": 2.4219, + "step": 263 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985469812675103, + "loss": 2.4043, + "step": 264 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009985218336416137, + "loss": 2.4082, + "step": 265 + }, + { + "epoch": 0.05, + "learning_rate": 0.000998496470583896, + "loss": 2.459, + "step": 266 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984708921053178, + "loss": 2.4297, + "step": 267 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984450982169326, + "loss": 2.4922, + "step": 268 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009984190889298868, + "loss": 2.4043, + "step": 269 + }, + { + "epoch": 0.05, + "learning_rate": 0.0009983928642554203, + "loss": 2.4238, + "step": 270 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983664242048658, + "loss": 2.4062, + "step": 271 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983397687896491, + "loss": 2.4199, + "step": 272 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009983128980212892, + "loss": 2.4453, + "step": 273 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998285811911398, + "loss": 2.457, + "step": 274 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982585104716805, + "loss": 2.4492, + "step": 275 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982309937139344, + "loss": 2.4512, + "step": 276 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009982032616500517, + "loss": 2.459, + "step": 277 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981753142920158, + "loss": 2.4121, + "step": 278 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981471516519044, + "loss": 2.4258, + "step": 279 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009981187737418873, + "loss": 2.4316, + "step": 280 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980901805742282, + "loss": 2.4824, + "step": 281 + }, + { + "epoch": 0.06, + "learning_rate": 0.000998061372161283, + "loss": 2.4023, + "step": 282 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980323485155012, + "loss": 2.3711, + "step": 283 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009980031096494253, + "loss": 2.3418, + "step": 284 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979736555756902, + "loss": 2.3652, + "step": 285 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979439863070244, + "loss": 2.4023, + "step": 286 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009979141018562495, + "loss": 2.4434, + "step": 287 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978840022362792, + "loss": 2.4824, + "step": 288 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978536874601213, + "loss": 2.4648, + "step": 289 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009978231575408759, + "loss": 2.332, + "step": 290 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997792412491736, + "loss": 2.4219, + "step": 291 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977614523259883, + "loss": 2.334, + "step": 292 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009977302770570115, + "loss": 2.3633, + "step": 293 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976988866982782, + "loss": 2.4824, + "step": 294 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997667281263353, + "loss": 2.4375, + "step": 295 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997635460765894, + "loss": 2.4434, + "step": 296 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009976034252196524, + "loss": 2.3633, + "step": 297 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975711746384717, + "loss": 2.334, + "step": 298 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009975387090362892, + "loss": 2.3633, + "step": 299 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997506028427134, + "loss": 2.4043, + "step": 300 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974731328251294, + "loss": 2.4551, + "step": 301 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974400222444904, + "loss": 2.4473, + "step": 302 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009974066966995257, + "loss": 2.3496, + "step": 303 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973731562046366, + "loss": 2.4082, + "step": 304 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973394007743175, + "loss": 2.3398, + "step": 305 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009973054304231552, + "loss": 2.4727, + "step": 306 + }, + { + "epoch": 0.06, + "learning_rate": 0.00099727124516583, + "loss": 2.4121, + "step": 307 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972368450171145, + "loss": 2.4414, + "step": 308 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009972022299918745, + "loss": 2.3867, + "step": 309 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971674001050686, + "loss": 2.4062, + "step": 310 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009971323553717483, + "loss": 2.3535, + "step": 311 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970970958070576, + "loss": 2.3496, + "step": 312 + }, + { + "epoch": 0.06, + "learning_rate": 0.000997061621426234, + "loss": 2.3711, + "step": 313 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009970259322446072, + "loss": 2.4238, + "step": 314 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969900282776, + "loss": 2.3379, + "step": 315 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996953909540728, + "loss": 2.3867, + "step": 316 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009969175760495997, + "loss": 2.3867, + "step": 317 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996881027819916, + "loss": 2.375, + "step": 318 + }, + { + "epoch": 0.06, + "learning_rate": 0.0009968442648674713, + "loss": 2.4141, + "step": 319 + }, + { + "epoch": 0.06, + "learning_rate": 0.000996807287208152, + "loss": 2.4316, + "step": 320 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967700948579378, + "loss": 2.459, + "step": 321 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009967326878329015, + "loss": 2.4336, + "step": 322 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966950661492073, + "loss": 2.3457, + "step": 323 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966572298231139, + "loss": 2.3555, + "step": 324 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009966191788709714, + "loss": 2.3379, + "step": 325 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965809133092236, + "loss": 2.3789, + "step": 326 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965424331544064, + "loss": 2.3633, + "step": 327 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009965037384231487, + "loss": 2.3398, + "step": 328 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996464829132172, + "loss": 2.3887, + "step": 329 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009964257052982907, + "loss": 2.4199, + "step": 330 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963863669384119, + "loss": 2.4512, + "step": 331 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996346814069535, + "loss": 2.3711, + "step": 332 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009963070467087528, + "loss": 2.4238, + "step": 333 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962670648732503, + "loss": 2.3984, + "step": 334 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009962268685803053, + "loss": 2.3594, + "step": 335 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961864578472883, + "loss": 2.3809, + "step": 336 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009961458326916623, + "loss": 2.3086, + "step": 337 + }, + { + "epoch": 0.07, + "learning_rate": 0.000996104993130983, + "loss": 2.3711, + "step": 338 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960639391828994, + "loss": 2.3672, + "step": 339 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009960226708651521, + "loss": 2.4043, + "step": 340 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995981188195575, + "loss": 2.3828, + "step": 341 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009959394911920943, + "loss": 2.334, + "step": 342 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958975798727294, + "loss": 2.3262, + "step": 343 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009958554542555916, + "loss": 2.3984, + "step": 344 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995813114358885, + "loss": 2.3945, + "step": 345 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009957705602009067, + "loss": 2.4023, + "step": 346 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995727791800046, + "loss": 2.3379, + "step": 347 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009956848091747848, + "loss": 2.3613, + "step": 348 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995641612343698, + "loss": 2.4043, + "step": 349 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955982013254523, + "loss": 2.457, + "step": 350 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009955545761388073, + "loss": 2.3184, + "step": 351 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995510736802616, + "loss": 2.3926, + "step": 352 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009954666833358224, + "loss": 2.4277, + "step": 353 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995422415757464, + "loss": 2.332, + "step": 354 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995377934086671, + "loss": 2.3223, + "step": 355 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009953332383426654, + "loss": 2.3457, + "step": 356 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952883285447623, + "loss": 2.3887, + "step": 357 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009952432047123688, + "loss": 2.3926, + "step": 358 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995197866864985, + "loss": 2.3809, + "step": 359 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995152315022203, + "loss": 2.3398, + "step": 360 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995106549203708, + "loss": 2.3438, + "step": 361 + }, + { + "epoch": 0.07, + "learning_rate": 0.000995060569429277, + "loss": 2.3848, + "step": 362 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009950143757187799, + "loss": 2.3438, + "step": 363 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994967968092179, + "loss": 2.3008, + "step": 364 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009949213465695287, + "loss": 2.3066, + "step": 365 + }, + { + "epoch": 0.07, + "learning_rate": 0.000994874511170976, + "loss": 2.3418, + "step": 366 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009948274619167607, + "loss": 2.5137, + "step": 367 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947801988272147, + "loss": 2.2285, + "step": 368 + }, + { + "epoch": 0.07, + "learning_rate": 0.0009947327219227621, + "loss": 2.4102, + "step": 369 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946850312239199, + "loss": 2.3301, + "step": 370 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009946371267512972, + "loss": 2.3398, + "step": 371 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994589008525595, + "loss": 2.3418, + "step": 372 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009945406765676078, + "loss": 2.3398, + "step": 373 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009944921308982218, + "loss": 2.3066, + "step": 374 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994443371538415, + "loss": 2.291, + "step": 375 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943943985092588, + "loss": 2.3242, + "step": 376 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009943452118319164, + "loss": 2.2852, + "step": 377 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942958115276433, + "loss": 2.334, + "step": 378 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009942461976177874, + "loss": 2.3008, + "step": 379 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994196370123789, + "loss": 2.3145, + "step": 380 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009941463290671806, + "loss": 2.3496, + "step": 381 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009940960744695868, + "loss": 2.3184, + "step": 382 + }, + { + "epoch": 0.08, + "learning_rate": 0.000994045606352725, + "loss": 2.3535, + "step": 383 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939949247384046, + "loss": 2.3281, + "step": 384 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009939440296485268, + "loss": 2.3184, + "step": 385 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938929211050856, + "loss": 2.3984, + "step": 386 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009938415991301674, + "loss": 2.2988, + "step": 387 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937900637459502, + "loss": 2.291, + "step": 388 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009937383149747048, + "loss": 2.3164, + "step": 389 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993686352838794, + "loss": 2.2695, + "step": 390 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009936341773606723, + "loss": 2.2578, + "step": 391 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935817885628874, + "loss": 2.3574, + "step": 392 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009935291864680783, + "loss": 2.3047, + "step": 393 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009934763710989771, + "loss": 2.2793, + "step": 394 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993423342478407, + "loss": 2.3105, + "step": 395 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933701006292837, + "loss": 2.3262, + "step": 396 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009933166455746158, + "loss": 2.3613, + "step": 397 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932629773375028, + "loss": 2.2852, + "step": 398 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009932090959411375, + "loss": 2.3262, + "step": 399 + }, + { + "epoch": 0.08, + "learning_rate": 0.000993155001408804, + "loss": 2.3848, + "step": 400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009931006937638786, + "loss": 2.2871, + "step": 401 + }, + { + "epoch": 0.08, + "learning_rate": 0.00099304617302983, + "loss": 2.375, + "step": 402 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929914392302192, + "loss": 2.3398, + "step": 403 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009929364923886986, + "loss": 2.2695, + "step": 404 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992881332529013, + "loss": 2.373, + "step": 405 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009928259596749993, + "loss": 2.3828, + "step": 406 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009927703738505861, + "loss": 2.2715, + "step": 407 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992714575079795, + "loss": 2.2832, + "step": 408 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926585633867385, + "loss": 2.2715, + "step": 409 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009926023387956215, + "loss": 2.3086, + "step": 410 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009925459013307412, + "loss": 2.3242, + "step": 411 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924892510164863, + "loss": 2.3301, + "step": 412 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009924323878773378, + "loss": 2.3105, + "step": 413 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992375311937869, + "loss": 2.2969, + "step": 414 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992318023222744, + "loss": 2.3613, + "step": 415 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922605217567203, + "loss": 2.3242, + "step": 416 + }, + { + "epoch": 0.08, + "learning_rate": 0.0009922028075646464, + "loss": 2.3457, + "step": 417 + }, + { + "epoch": 0.08, + "learning_rate": 0.000992144880671463, + "loss": 2.2715, + "step": 418 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920867411022025, + "loss": 2.3359, + "step": 419 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009920283888819898, + "loss": 2.3457, + "step": 420 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919698240360409, + "loss": 2.2637, + "step": 421 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009919110465896643, + "loss": 2.2676, + "step": 422 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009918520565682604, + "loss": 2.4023, + "step": 423 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917928539973206, + "loss": 2.3242, + "step": 424 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009917334389024292, + "loss": 2.3164, + "step": 425 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009916738113092617, + "loss": 2.2578, + "step": 426 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991613971243586, + "loss": 2.3691, + "step": 427 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991553918731261, + "loss": 2.332, + "step": 428 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914936537982382, + "loss": 2.3691, + "step": 429 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009914331764705605, + "loss": 2.2793, + "step": 430 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913724867743623, + "loss": 2.2539, + "step": 431 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009913115847358708, + "loss": 2.3457, + "step": 432 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009912504703814037, + "loss": 2.3398, + "step": 433 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991189143737371, + "loss": 2.3164, + "step": 434 + }, + { + "epoch": 0.09, + "learning_rate": 0.000991127604830275, + "loss": 2.3613, + "step": 435 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910658536867085, + "loss": 2.2715, + "step": 436 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009910038903333575, + "loss": 2.2871, + "step": 437 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990941714796998, + "loss": 2.3203, + "step": 438 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009908793271044996, + "loss": 2.3457, + "step": 439 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990816727282822, + "loss": 2.3418, + "step": 440 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009907539153590173, + "loss": 2.293, + "step": 441 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990690891360229, + "loss": 2.1924, + "step": 442 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009906276553136924, + "loss": 2.3984, + "step": 443 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905642072467345, + "loss": 2.3496, + "step": 444 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009905005471867738, + "loss": 2.3301, + "step": 445 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009904366751613204, + "loss": 2.3223, + "step": 446 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990372591197976, + "loss": 2.3086, + "step": 447 + }, + { + "epoch": 0.09, + "learning_rate": 0.000990308295324434, + "loss": 2.2871, + "step": 448 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009902437875684794, + "loss": 2.3242, + "step": 449 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901790679579883, + "loss": 2.3301, + "step": 450 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009901141365209292, + "loss": 2.3281, + "step": 451 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009900489932853612, + "loss": 2.3203, + "step": 452 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009899836382794357, + "loss": 2.332, + "step": 453 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989918071531395, + "loss": 2.3379, + "step": 454 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009898522930695732, + "loss": 2.3008, + "step": 455 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009897863029223962, + "loss": 2.2812, + "step": 456 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989720101118381, + "loss": 2.3066, + "step": 457 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989653687686136, + "loss": 2.3105, + "step": 458 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989587062654361, + "loss": 2.2754, + "step": 459 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009895202260518476, + "loss": 2.3203, + "step": 460 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009894531779074788, + "loss": 2.2871, + "step": 461 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893859182502286, + "loss": 2.2891, + "step": 462 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009893184471091632, + "loss": 2.3301, + "step": 463 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009892507645134388, + "loss": 2.3301, + "step": 464 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891828704923044, + "loss": 2.3047, + "step": 465 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009891147650750998, + "loss": 2.334, + "step": 466 + }, + { + "epoch": 0.09, + "learning_rate": 0.000989046448291256, + "loss": 2.293, + "step": 467 + }, + { + "epoch": 0.09, + "learning_rate": 0.0009889779201702955, + "loss": 2.2344, + "step": 468 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009889091807418321, + "loss": 2.2598, + "step": 469 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009888402300355713, + "loss": 2.3066, + "step": 470 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988771068081309, + "loss": 2.3574, + "step": 471 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009887016949089334, + "loss": 2.3359, + "step": 472 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988632110548423, + "loss": 2.3047, + "step": 473 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009885623150298487, + "loss": 2.3027, + "step": 474 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884923083833715, + "loss": 2.2129, + "step": 475 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009884220906392442, + "loss": 2.2812, + "step": 476 + }, + { + "epoch": 0.1, + "learning_rate": 0.000988351661827811, + "loss": 2.3574, + "step": 477 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882810219795071, + "loss": 2.3535, + "step": 478 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009882101711248586, + "loss": 2.3652, + "step": 479 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009881391092944835, + "loss": 2.2695, + "step": 480 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009880678365190901, + "loss": 2.2812, + "step": 481 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009879963528294786, + "loss": 2.3555, + "step": 482 + }, + { + "epoch": 0.1, + "learning_rate": 0.00098792465825654, + "loss": 2.3086, + "step": 483 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009878527528312563, + "loss": 2.2207, + "step": 484 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877806365847011, + "loss": 2.3301, + "step": 485 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009877083095480386, + "loss": 2.2422, + "step": 486 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009876357717525245, + "loss": 2.3301, + "step": 487 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009875630232295048, + "loss": 2.2734, + "step": 488 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874900640104178, + "loss": 2.2754, + "step": 489 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009874168941267918, + "loss": 2.2441, + "step": 490 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009873435136102467, + "loss": 2.293, + "step": 491 + }, + { + "epoch": 0.1, + "learning_rate": 0.000987269922492493, + "loss": 2.3242, + "step": 492 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871961208053328, + "loss": 2.2188, + "step": 493 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009871221085806586, + "loss": 2.2441, + "step": 494 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009870478858504544, + "loss": 2.3867, + "step": 495 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009869734526467945, + "loss": 2.3672, + "step": 496 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868988090018448, + "loss": 2.2422, + "step": 497 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009868239549478618, + "loss": 2.2676, + "step": 498 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009867488905171934, + "loss": 2.2363, + "step": 499 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009866736157422775, + "loss": 2.3379, + "step": 500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865981306556436, + "loss": 2.2812, + "step": 501 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009865224352899118, + "loss": 2.3438, + "step": 502 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009864465296777938, + "loss": 2.2598, + "step": 503 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009863704138520907, + "loss": 2.3184, + "step": 504 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862940878456958, + "loss": 2.2637, + "step": 505 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009862175516915925, + "loss": 2.2715, + "step": 506 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009861408054228555, + "loss": 2.3262, + "step": 507 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009860638490726498, + "loss": 2.291, + "step": 508 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859866826742313, + "loss": 2.2617, + "step": 509 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009859093062609468, + "loss": 2.3125, + "step": 510 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009858317198662343, + "loss": 2.2949, + "step": 511 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009857539235236215, + "loss": 2.3184, + "step": 512 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009856759172667277, + "loss": 2.291, + "step": 513 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855977011292625, + "loss": 2.2812, + "step": 514 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009855192751450266, + "loss": 2.2148, + "step": 515 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009854406393479105, + "loss": 2.2383, + "step": 516 + }, + { + "epoch": 0.1, + "learning_rate": 0.0009853617937718967, + "loss": 2.2617, + "step": 517 + }, + { + "epoch": 0.11, + "learning_rate": 0.000985282738451057, + "loss": 2.2559, + "step": 518 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009852034734195547, + "loss": 2.2305, + "step": 519 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009851239987116436, + "loss": 2.373, + "step": 520 + }, + { + "epoch": 0.11, + "learning_rate": 0.000985044314361668, + "loss": 2.2734, + "step": 521 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009849644204040625, + "loss": 2.3047, + "step": 522 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984884316873353, + "loss": 2.2539, + "step": 523 + }, + { + "epoch": 0.11, + "learning_rate": 0.000984804003804155, + "loss": 2.3418, + "step": 524 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009847234812311756, + "loss": 2.4023, + "step": 525 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009846427491892117, + "loss": 2.3867, + "step": 526 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009845618077131508, + "loss": 2.2559, + "step": 527 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009844806568379714, + "loss": 2.2637, + "step": 528 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843992965987418, + "loss": 2.2734, + "step": 529 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009843177270306212, + "loss": 2.2793, + "step": 530 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009842359481688592, + "loss": 2.3066, + "step": 531 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009841539600487959, + "loss": 2.3555, + "step": 532 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009840717627058616, + "loss": 2.2832, + "step": 533 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839893561755773, + "loss": 2.2979, + "step": 534 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009839067404935542, + "loss": 2.3535, + "step": 535 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009838239156954938, + "loss": 2.1562, + "step": 536 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009837408818171882, + "loss": 2.3086, + "step": 537 + }, + { + "epoch": 0.11, + "learning_rate": 0.00098365763889452, + "loss": 2.2197, + "step": 538 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009835741869634615, + "loss": 2.3125, + "step": 539 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983490526060076, + "loss": 2.2422, + "step": 540 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009834066562205165, + "loss": 2.3359, + "step": 541 + }, + { + "epoch": 0.11, + "learning_rate": 0.000983322577481027, + "loss": 2.248, + "step": 542 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009832382898779413, + "loss": 2.2363, + "step": 543 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009831537934476835, + "loss": 2.3076, + "step": 544 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009830690882267677, + "loss": 2.2695, + "step": 545 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982984174251799, + "loss": 2.2812, + "step": 546 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982899051559472, + "loss": 2.2734, + "step": 547 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009828137201865715, + "loss": 2.2754, + "step": 548 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009827281801699731, + "loss": 2.2949, + "step": 549 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982642431546642, + "loss": 2.2773, + "step": 550 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009825564743536335, + "loss": 2.2793, + "step": 551 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009824703086280938, + "loss": 2.3086, + "step": 552 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982383934407258, + "loss": 2.2168, + "step": 553 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822973517284524, + "loss": 2.2568, + "step": 554 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009822105606290927, + "loss": 2.2148, + "step": 555 + }, + { + "epoch": 0.11, + "learning_rate": 0.000982123561146685, + "loss": 2.334, + "step": 556 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009820363533188255, + "loss": 2.3047, + "step": 557 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009819489371832003, + "loss": 2.209, + "step": 558 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981861312777585, + "loss": 2.2812, + "step": 559 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009817734801398463, + "loss": 2.2949, + "step": 560 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009816854393079402, + "loss": 2.3555, + "step": 561 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815971903199127, + "loss": 2.2852, + "step": 562 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009815087332138996, + "loss": 2.3086, + "step": 563 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009814200680281272, + "loss": 2.252, + "step": 564 + }, + { + "epoch": 0.11, + "learning_rate": 0.0009813311948009109, + "loss": 2.2715, + "step": 565 + }, + { + "epoch": 0.11, + "learning_rate": 0.000981242113570657, + "loss": 2.1299, + "step": 566 + }, + { + "epoch": 0.12, + "learning_rate": 0.000981152824375861, + "loss": 2.2148, + "step": 567 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009810633272551085, + "loss": 2.3496, + "step": 568 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009809736222470748, + "loss": 2.2871, + "step": 569 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009808837093905254, + "loss": 2.3086, + "step": 570 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009807935887243147, + "loss": 2.2559, + "step": 571 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980703260287388, + "loss": 2.2461, + "step": 572 + }, + { + "epoch": 0.12, + "learning_rate": 0.00098061272411878, + "loss": 2.2559, + "step": 573 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980521980257615, + "loss": 2.2949, + "step": 574 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980431028743107, + "loss": 2.2637, + "step": 575 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009803398696145603, + "loss": 2.2129, + "step": 576 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009802485029113681, + "loss": 2.3418, + "step": 577 + }, + { + "epoch": 0.12, + "learning_rate": 0.000980156928673014, + "loss": 2.2285, + "step": 578 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009800651469390705, + "loss": 2.291, + "step": 579 + }, + { + "epoch": 0.12, + "learning_rate": 0.000979973157749201, + "loss": 2.3086, + "step": 580 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009798809611431575, + "loss": 2.1855, + "step": 581 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009797885571607818, + "loss": 2.2227, + "step": 582 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009796959458420057, + "loss": 2.3242, + "step": 583 + }, + { + "epoch": 0.12, + "learning_rate": 0.00097960312722685, + "loss": 2.2676, + "step": 584 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009795101013554258, + "loss": 2.3027, + "step": 585 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009794168682679333, + "loss": 2.2676, + "step": 586 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009793234280046626, + "loss": 2.2188, + "step": 587 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009792297806059927, + "loss": 2.2422, + "step": 588 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009791359261123926, + "loss": 2.334, + "step": 589 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009790418645644209, + "loss": 2.2441, + "step": 590 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978947596002725, + "loss": 2.2812, + "step": 591 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009788531204680429, + "loss": 2.2168, + "step": 592 + }, + { + "epoch": 0.12, + "learning_rate": 0.000978758438001201, + "loss": 2.2422, + "step": 593 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009786635486431152, + "loss": 2.2334, + "step": 594 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009785684524347916, + "loss": 2.1758, + "step": 595 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009784731494173252, + "loss": 2.2969, + "step": 596 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009783776396318999, + "loss": 2.2246, + "step": 597 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009782819231197897, + "loss": 2.2891, + "step": 598 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009781859999223577, + "loss": 2.3086, + "step": 599 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009780898700810562, + "loss": 2.3086, + "step": 600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009779935336374267, + "loss": 2.2402, + "step": 601 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778969906331005, + "loss": 2.2617, + "step": 602 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009778002411097975, + "loss": 2.2559, + "step": 603 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009777032851093273, + "loss": 2.2871, + "step": 604 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009776061226735883, + "loss": 2.2217, + "step": 605 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977508753844569, + "loss": 2.2969, + "step": 606 + }, + { + "epoch": 0.12, + "learning_rate": 0.000977411178664346, + "loss": 2.3379, + "step": 607 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009773133971750856, + "loss": 2.4512, + "step": 608 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009772154094190434, + "loss": 2.2822, + "step": 609 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009771172154385637, + "loss": 2.3047, + "step": 610 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009770188152760804, + "loss": 2.2461, + "step": 611 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976920208974116, + "loss": 2.1621, + "step": 612 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009768213965752827, + "loss": 2.3457, + "step": 613 + }, + { + "epoch": 0.12, + "learning_rate": 0.000976722378122281, + "loss": 2.2871, + "step": 614 + }, + { + "epoch": 0.12, + "learning_rate": 0.0009766231536579013, + "loss": 2.2617, + "step": 615 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009765237232250222, + "loss": 2.2559, + "step": 616 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009764240868666119, + "loss": 2.2871, + "step": 617 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009763242446257272, + "loss": 2.2109, + "step": 618 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009762241965455141, + "loss": 2.3438, + "step": 619 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009761239426692076, + "loss": 2.2676, + "step": 620 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009760234830401316, + "loss": 2.2109, + "step": 621 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009759228177016985, + "loss": 2.3457, + "step": 622 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009758219466974104, + "loss": 2.2461, + "step": 623 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009757208700708575, + "loss": 2.2891, + "step": 624 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009756195878657192, + "loss": 2.2441, + "step": 625 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009755181001257637, + "loss": 2.293, + "step": 626 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009754164068948481, + "loss": 2.25, + "step": 627 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009753145082169183, + "loss": 2.3086, + "step": 628 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009752124041360089, + "loss": 2.2227, + "step": 629 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009751100946962432, + "loss": 2.1973, + "step": 630 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009750075799418332, + "loss": 2.2461, + "step": 631 + }, + { + "epoch": 0.13, + "learning_rate": 0.00097490485991708, + "loss": 2.1953, + "step": 632 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009748019346663731, + "loss": 2.1914, + "step": 633 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009746988042341906, + "loss": 2.2656, + "step": 634 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009745954686650997, + "loss": 2.2617, + "step": 635 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009744919280037555, + "loss": 2.2109, + "step": 636 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009743881822949026, + "loss": 2.2812, + "step": 637 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009742842315833735, + "loss": 2.2646, + "step": 638 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009741800759140898, + "loss": 2.25, + "step": 639 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009740757153320614, + "loss": 2.2559, + "step": 640 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009739711498823868, + "loss": 2.2266, + "step": 641 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009738663796102529, + "loss": 2.2158, + "step": 642 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009737614045609355, + "loss": 2.1758, + "step": 643 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009736562247797984, + "loss": 2.2793, + "step": 644 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009735508403122944, + "loss": 2.2402, + "step": 645 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009734452512039642, + "loss": 2.2031, + "step": 646 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009733394575004373, + "loss": 2.2441, + "step": 647 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009732334592474314, + "loss": 2.2637, + "step": 648 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009731272564907532, + "loss": 2.2383, + "step": 649 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009730208492762966, + "loss": 2.293, + "step": 650 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009729142376500449, + "loss": 2.25, + "step": 651 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009728074216580694, + "loss": 2.293, + "step": 652 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009727004013465296, + "loss": 2.1875, + "step": 653 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009725931767616733, + "loss": 2.291, + "step": 654 + }, + { + "epoch": 0.13, + "learning_rate": 0.000972485747949837, + "loss": 2.1426, + "step": 655 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009723781149574449, + "loss": 2.3496, + "step": 656 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009722702778310095, + "loss": 2.2207, + "step": 657 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009721622366171318, + "loss": 2.3145, + "step": 658 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009720539913625008, + "loss": 2.2734, + "step": 659 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009719455421138938, + "loss": 2.2441, + "step": 660 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009718368889181764, + "loss": 2.2598, + "step": 661 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009717280318223018, + "loss": 2.291, + "step": 662 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009716189708733116, + "loss": 2.2031, + "step": 663 + }, + { + "epoch": 0.13, + "learning_rate": 0.0009715097061183357, + "loss": 2.2598, + "step": 664 + }, + { + "epoch": 0.13, + "learning_rate": 0.000971400237604592, + "loss": 2.2344, + "step": 665 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009712905653793862, + "loss": 2.2949, + "step": 666 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009711806894901123, + "loss": 2.1875, + "step": 667 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009710706099842519, + "loss": 2.2285, + "step": 668 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009709603269093752, + "loss": 2.2285, + "step": 669 + }, + { + "epoch": 0.14, + "learning_rate": 0.00097084984031314, + "loss": 2.2266, + "step": 670 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009707391502432921, + "loss": 2.2559, + "step": 671 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009706282567476651, + "loss": 2.2695, + "step": 672 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009705171598741808, + "loss": 2.1973, + "step": 673 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009704058596708488, + "loss": 2.2383, + "step": 674 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009702943561857661, + "loss": 2.2236, + "step": 675 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009701826494671184, + "loss": 2.2344, + "step": 676 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009700707395631787, + "loss": 2.1572, + "step": 677 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009699586265223077, + "loss": 2.2129, + "step": 678 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009698463103929542, + "loss": 2.2402, + "step": 679 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009697337912236546, + "loss": 2.2461, + "step": 680 + }, + { + "epoch": 0.14, + "learning_rate": 0.000969621069063033, + "loss": 2.2637, + "step": 681 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009695081439598014, + "loss": 2.2227, + "step": 682 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009693950159627594, + "loss": 2.2559, + "step": 683 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009692816851207941, + "loss": 2.3496, + "step": 684 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009691681514828806, + "loss": 2.2051, + "step": 685 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009690544150980814, + "loss": 2.2695, + "step": 686 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009689404760155465, + "loss": 2.2773, + "step": 687 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009688263342845139, + "loss": 2.2285, + "step": 688 + }, + { + "epoch": 0.14, + "learning_rate": 0.000968711989954309, + "loss": 2.1621, + "step": 689 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009685974430743445, + "loss": 2.3047, + "step": 690 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009684826936941209, + "loss": 2.2119, + "step": 691 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009683677418632262, + "loss": 2.1865, + "step": 692 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009682525876313357, + "loss": 2.1309, + "step": 693 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009681372310482123, + "loss": 2.2227, + "step": 694 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009680216721637065, + "loss": 2.2344, + "step": 695 + }, + { + "epoch": 0.14, + "learning_rate": 0.000967905911027756, + "loss": 2.1914, + "step": 696 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009677899476903856, + "loss": 2.2734, + "step": 697 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009676737822017083, + "loss": 2.252, + "step": 698 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009675574146119239, + "loss": 2.3047, + "step": 699 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009674408449713193, + "loss": 2.25, + "step": 700 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009673240733302695, + "loss": 2.2344, + "step": 701 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009672070997392361, + "loss": 2.2412, + "step": 702 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009670899242487682, + "loss": 2.2275, + "step": 703 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009669725469095021, + "loss": 2.2051, + "step": 704 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009668549677721616, + "loss": 2.2598, + "step": 705 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009667371868875574, + "loss": 2.2793, + "step": 706 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009666192043065873, + "loss": 2.1738, + "step": 707 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009665010200802367, + "loss": 2.2734, + "step": 708 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009663826342595776, + "loss": 2.2305, + "step": 709 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009662640468957696, + "loss": 2.2568, + "step": 710 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009661452580400591, + "loss": 2.3086, + "step": 711 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009660262677437798, + "loss": 2.2109, + "step": 712 + }, + { + "epoch": 0.14, + "learning_rate": 0.000965907076058352, + "loss": 2.2598, + "step": 713 + }, + { + "epoch": 0.14, + "learning_rate": 0.0009657876830352837, + "loss": 2.2598, + "step": 714 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009656680887261692, + "loss": 2.1895, + "step": 715 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009655482931826905, + "loss": 2.2812, + "step": 716 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009654282964566158, + "loss": 2.2539, + "step": 717 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009653080985998009, + "loss": 2.2432, + "step": 718 + }, + { + "epoch": 0.15, + "learning_rate": 0.000965187699664188, + "loss": 2.1641, + "step": 719 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009650670997018067, + "loss": 2.2188, + "step": 720 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009649462987647732, + "loss": 2.2598, + "step": 721 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009648252969052904, + "loss": 2.1973, + "step": 722 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009647040941756482, + "loss": 2.1973, + "step": 723 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009645826906282233, + "loss": 2.1875, + "step": 724 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009644610863154795, + "loss": 2.252, + "step": 725 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009643392812899665, + "loss": 2.2188, + "step": 726 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009642172756043216, + "loss": 2.2051, + "step": 727 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009640950693112684, + "loss": 2.2051, + "step": 728 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009639726624636174, + "loss": 2.2617, + "step": 729 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009638500551142653, + "loss": 2.2188, + "step": 730 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009637272473161962, + "loss": 2.2832, + "step": 731 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009636042391224802, + "loss": 2.2383, + "step": 732 + }, + { + "epoch": 0.15, + "learning_rate": 0.000963481030586274, + "loss": 2.1934, + "step": 733 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009633576217608215, + "loss": 2.2402, + "step": 734 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009632340126994526, + "loss": 2.1523, + "step": 735 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009631102034555839, + "loss": 2.252, + "step": 736 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009629861940827182, + "loss": 2.209, + "step": 737 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009628619846344453, + "loss": 2.2188, + "step": 738 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009627375751644411, + "loss": 2.1816, + "step": 739 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009626129657264681, + "loss": 2.2285, + "step": 740 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009624881563743754, + "loss": 2.1992, + "step": 741 + }, + { + "epoch": 0.15, + "learning_rate": 0.000962363147162098, + "loss": 2.1836, + "step": 742 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009622379381436575, + "loss": 2.2285, + "step": 743 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009621125293731619, + "loss": 2.2471, + "step": 744 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009619869209048058, + "loss": 2.2188, + "step": 745 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009618611127928694, + "loss": 2.1914, + "step": 746 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009617351050917195, + "loss": 2.1895, + "step": 747 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009616088978558099, + "loss": 2.21, + "step": 748 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009614824911396792, + "loss": 2.2031, + "step": 749 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009613558849979534, + "loss": 2.2031, + "step": 750 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009612290794853438, + "loss": 2.1602, + "step": 751 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009611020746566487, + "loss": 2.2656, + "step": 752 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960974870566752, + "loss": 2.2559, + "step": 753 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009608474672706239, + "loss": 2.2559, + "step": 754 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009607198648233204, + "loss": 2.2148, + "step": 755 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009605920632799838, + "loss": 2.252, + "step": 756 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009604640626958427, + "loss": 2.2012, + "step": 757 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009603358631262111, + "loss": 2.2812, + "step": 758 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009602074646264893, + "loss": 2.209, + "step": 759 + }, + { + "epoch": 0.15, + "learning_rate": 0.000960078867252164, + "loss": 2.1963, + "step": 760 + }, + { + "epoch": 0.15, + "learning_rate": 0.000959950071058807, + "loss": 2.2539, + "step": 761 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009598210761020765, + "loss": 2.1914, + "step": 762 + }, + { + "epoch": 0.15, + "learning_rate": 0.0009596918824377168, + "loss": 2.1973, + "step": 763 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009595624901215573, + "loss": 2.1689, + "step": 764 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009594328992095144, + "loss": 2.2051, + "step": 765 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009593031097575893, + "loss": 2.2188, + "step": 766 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009591731218218693, + "loss": 2.2461, + "step": 767 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009590429354585277, + "loss": 2.2324, + "step": 768 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009589125507238233, + "loss": 2.1416, + "step": 769 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009587819676741008, + "loss": 2.248, + "step": 770 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009586511863657906, + "loss": 2.2207, + "step": 771 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009585202068554083, + "loss": 2.2637, + "step": 772 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009583890291995561, + "loss": 2.1816, + "step": 773 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009582576534549208, + "loss": 2.166, + "step": 774 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009581260796782754, + "loss": 2.209, + "step": 775 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009579943079264784, + "loss": 2.3008, + "step": 776 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009578623382564739, + "loss": 2.2227, + "step": 777 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009577301707252913, + "loss": 2.2041, + "step": 778 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009575978053900456, + "loss": 2.2227, + "step": 779 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009574652423079375, + "loss": 2.2207, + "step": 780 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009573324815362528, + "loss": 2.2344, + "step": 781 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009571995231323629, + "loss": 2.2363, + "step": 782 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009570663671537248, + "loss": 2.1504, + "step": 783 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009569330136578803, + "loss": 2.1621, + "step": 784 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009567994627024572, + "loss": 2.1895, + "step": 785 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009566657143451685, + "loss": 2.2734, + "step": 786 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009565317686438122, + "loss": 2.2773, + "step": 787 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009563976256562717, + "loss": 2.248, + "step": 788 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009562632854405158, + "loss": 2.1582, + "step": 789 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009561287480545984, + "loss": 2.2344, + "step": 790 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009559940135566587, + "loss": 2.2012, + "step": 791 + }, + { + "epoch": 0.16, + "learning_rate": 0.000955859082004921, + "loss": 2.25, + "step": 792 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009557239534576948, + "loss": 2.1953, + "step": 793 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009555886279733746, + "loss": 2.2383, + "step": 794 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009554531056104403, + "loss": 2.2656, + "step": 795 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009553173864274567, + "loss": 2.2832, + "step": 796 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009551814704830733, + "loss": 2.2441, + "step": 797 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009550453578360254, + "loss": 2.25, + "step": 798 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009549090485451327, + "loss": 2.1836, + "step": 799 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009547725426693001, + "loss": 2.1367, + "step": 800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009546358402675173, + "loss": 2.2246, + "step": 801 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009544989413988592, + "loss": 2.2109, + "step": 802 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009543618461224854, + "loss": 2.2871, + "step": 803 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009542245544976402, + "loss": 2.3027, + "step": 804 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009540870665836534, + "loss": 2.1797, + "step": 805 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009539493824399388, + "loss": 2.2188, + "step": 806 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009538115021259956, + "loss": 2.2773, + "step": 807 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009536734257014074, + "loss": 2.1523, + "step": 808 + }, + { + "epoch": 0.16, + "learning_rate": 0.000953535153225843, + "loss": 2.2461, + "step": 809 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009533966847590551, + "loss": 2.2188, + "step": 810 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009532580203608823, + "loss": 2.251, + "step": 811 + }, + { + "epoch": 0.16, + "learning_rate": 0.0009531191600912468, + "loss": 2.1895, + "step": 812 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009529801040101558, + "loss": 2.1592, + "step": 813 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009528408521777012, + "loss": 2.252, + "step": 814 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009527014046540597, + "loss": 2.1885, + "step": 815 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009525617614994921, + "loss": 2.2637, + "step": 816 + }, + { + "epoch": 0.17, + "learning_rate": 0.000952421922774344, + "loss": 2.1494, + "step": 817 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009522818885390455, + "loss": 2.1963, + "step": 818 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009521416588541111, + "loss": 2.1738, + "step": 819 + }, + { + "epoch": 0.17, + "learning_rate": 0.00095200123378014, + "loss": 2.1377, + "step": 820 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009518606133778153, + "loss": 2.2285, + "step": 821 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009517197977079052, + "loss": 2.25, + "step": 822 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009515787868312618, + "loss": 2.1855, + "step": 823 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009514375808088218, + "loss": 2.1514, + "step": 824 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009512961797016062, + "loss": 2.1758, + "step": 825 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009511545835707199, + "loss": 2.2148, + "step": 826 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009510127924773528, + "loss": 2.291, + "step": 827 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009508708064827785, + "loss": 2.2441, + "step": 828 + }, + { + "epoch": 0.17, + "learning_rate": 0.000950728625648355, + "loss": 2.2422, + "step": 829 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009505862500355246, + "loss": 2.1992, + "step": 830 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009504436797058136, + "loss": 2.25, + "step": 831 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009503009147208323, + "loss": 2.2188, + "step": 832 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009501579551422759, + "loss": 2.1797, + "step": 833 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009500148010319225, + "loss": 2.1875, + "step": 834 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009498714524516352, + "loss": 2.127, + "step": 835 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009497279094633607, + "loss": 2.1738, + "step": 836 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009495841721291299, + "loss": 2.165, + "step": 837 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009494402405110579, + "loss": 2.2012, + "step": 838 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009492961146713428, + "loss": 2.1318, + "step": 839 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009491517946722681, + "loss": 2.2188, + "step": 840 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009490072805761998, + "loss": 2.2578, + "step": 841 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009488625724455887, + "loss": 2.2285, + "step": 842 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009487176703429691, + "loss": 2.2402, + "step": 843 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009485725743309593, + "loss": 2.1641, + "step": 844 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009484272844722609, + "loss": 2.2383, + "step": 845 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009482818008296599, + "loss": 2.2188, + "step": 846 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009481361234660257, + "loss": 2.1855, + "step": 847 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009479902524443116, + "loss": 2.2148, + "step": 848 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009478441878275543, + "loss": 2.1953, + "step": 849 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009476979296788746, + "loss": 2.2148, + "step": 850 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009475514780614764, + "loss": 2.209, + "step": 851 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009474048330386475, + "loss": 2.1836, + "step": 852 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009472579946737594, + "loss": 2.1982, + "step": 853 + }, + { + "epoch": 0.17, + "learning_rate": 0.000947110963030267, + "loss": 2.2266, + "step": 854 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009469637381717085, + "loss": 2.1816, + "step": 855 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009468163201617061, + "loss": 2.1855, + "step": 856 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009466687090639652, + "loss": 2.1699, + "step": 857 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009465209049422744, + "loss": 2.2246, + "step": 858 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009463729078605061, + "loss": 2.1914, + "step": 859 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009462247178826158, + "loss": 2.2285, + "step": 860 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009460763350726428, + "loss": 2.1738, + "step": 861 + }, + { + "epoch": 0.17, + "learning_rate": 0.0009459277594947091, + "loss": 2.1738, + "step": 862 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009457789912130206, + "loss": 2.1689, + "step": 863 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009456300302918658, + "loss": 2.125, + "step": 864 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009454808767956173, + "loss": 2.3477, + "step": 865 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009453315307887303, + "loss": 2.2207, + "step": 866 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009451819923357434, + "loss": 2.2012, + "step": 867 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009450322615012782, + "loss": 2.1836, + "step": 868 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009448823383500396, + "loss": 2.25, + "step": 869 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009447322229468156, + "loss": 2.2334, + "step": 870 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009445819153564774, + "loss": 2.2578, + "step": 871 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009444314156439787, + "loss": 2.1797, + "step": 872 + }, + { + "epoch": 0.18, + "learning_rate": 0.000944280723874357, + "loss": 2.2568, + "step": 873 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009441298401127322, + "loss": 2.1836, + "step": 874 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009439787644243078, + "loss": 2.1211, + "step": 875 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009438274968743692, + "loss": 2.1543, + "step": 876 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009436760375282858, + "loss": 2.207, + "step": 877 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009435243864515093, + "loss": 2.291, + "step": 878 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009433725437095743, + "loss": 2.2168, + "step": 879 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009432205093680983, + "loss": 2.1973, + "step": 880 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009430682834927817, + "loss": 2.252, + "step": 881 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009429158661494077, + "loss": 2.2354, + "step": 882 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009427632574038418, + "loss": 2.2871, + "step": 883 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009426104573220326, + "loss": 2.3105, + "step": 884 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009424574659700116, + "loss": 2.1621, + "step": 885 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009423042834138924, + "loss": 2.207, + "step": 886 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009421509097198715, + "loss": 2.1641, + "step": 887 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009419973449542279, + "loss": 2.2432, + "step": 888 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009418435891833236, + "loss": 2.1973, + "step": 889 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009416896424736025, + "loss": 2.1777, + "step": 890 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009415355048915915, + "loss": 2.1133, + "step": 891 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009413811765038995, + "loss": 2.1709, + "step": 892 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009412266573772186, + "loss": 2.1777, + "step": 893 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009410719475783225, + "loss": 2.2539, + "step": 894 + }, + { + "epoch": 0.18, + "learning_rate": 0.000940917047174068, + "loss": 2.1777, + "step": 895 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009407619562313936, + "loss": 2.3145, + "step": 896 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009406066748173208, + "loss": 2.2891, + "step": 897 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009404512029989527, + "loss": 2.1816, + "step": 898 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009402955408434755, + "loss": 2.1484, + "step": 899 + }, + { + "epoch": 0.18, + "learning_rate": 0.000940139688418157, + "loss": 2.1543, + "step": 900 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009399836457903477, + "loss": 2.1865, + "step": 901 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009398274130274798, + "loss": 2.2188, + "step": 902 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009396709901970678, + "loss": 2.2539, + "step": 903 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009395143773667088, + "loss": 2.1553, + "step": 904 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009393575746040813, + "loss": 2.2285, + "step": 905 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009392005819769465, + "loss": 2.2891, + "step": 906 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009390433995531474, + "loss": 2.1309, + "step": 907 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009388860274006087, + "loss": 2.2754, + "step": 908 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009387284655873376, + "loss": 2.2168, + "step": 909 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009385707141814231, + "loss": 2.2051, + "step": 910 + }, + { + "epoch": 0.18, + "learning_rate": 0.0009384127732510361, + "loss": 2.2578, + "step": 911 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009382546428644291, + "loss": 2.1953, + "step": 912 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009380963230899371, + "loss": 2.1865, + "step": 913 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009379378139959764, + "loss": 2.291, + "step": 914 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009377791156510454, + "loss": 2.2109, + "step": 915 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009376202281237241, + "loss": 2.1504, + "step": 916 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009374611514826745, + "loss": 2.25, + "step": 917 + }, + { + "epoch": 0.19, + "learning_rate": 0.00093730188579664, + "loss": 2.2422, + "step": 918 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009371424311344458, + "loss": 2.1279, + "step": 919 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009369827875649992, + "loss": 2.1914, + "step": 920 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009368229551572884, + "loss": 2.2246, + "step": 921 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009366629339803836, + "loss": 2.1406, + "step": 922 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009365027241034367, + "loss": 2.1914, + "step": 923 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009363423255956809, + "loss": 2.2051, + "step": 924 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009361817385264311, + "loss": 2.2891, + "step": 925 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009360209629650834, + "loss": 2.2119, + "step": 926 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009358599989811158, + "loss": 2.1367, + "step": 927 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009356988466440872, + "loss": 2.2256, + "step": 928 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009355375060236383, + "loss": 2.1836, + "step": 929 + }, + { + "epoch": 0.19, + "learning_rate": 0.000935375977189491, + "loss": 2.2148, + "step": 930 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009352142602114486, + "loss": 2.1934, + "step": 931 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009350523551593957, + "loss": 2.2422, + "step": 932 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934890262103298, + "loss": 2.168, + "step": 933 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009347279811132027, + "loss": 2.082, + "step": 934 + }, + { + "epoch": 0.19, + "learning_rate": 0.000934565512259238, + "loss": 2.2246, + "step": 935 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009344028556116135, + "loss": 2.2012, + "step": 936 + }, + { + "epoch": 0.19, + "learning_rate": 0.00093424001124062, + "loss": 2.2148, + "step": 937 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009340769792166288, + "loss": 2.2217, + "step": 938 + }, + { + "epoch": 0.19, + "learning_rate": 0.000933913759610093, + "loss": 2.1045, + "step": 939 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009337503524915467, + "loss": 2.2031, + "step": 940 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009335867579316047, + "loss": 2.1631, + "step": 941 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009334229760009628, + "loss": 2.21, + "step": 942 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009332590067703981, + "loss": 2.1494, + "step": 943 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009330948503107684, + "loss": 2.2129, + "step": 944 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009329305066930125, + "loss": 2.1816, + "step": 945 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009327659759881499, + "loss": 2.2422, + "step": 946 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009326012582672814, + "loss": 2.1846, + "step": 947 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009324363536015878, + "loss": 2.2051, + "step": 948 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009322712620623318, + "loss": 2.1348, + "step": 949 + }, + { + "epoch": 0.19, + "learning_rate": 0.000932105983720856, + "loss": 2.1582, + "step": 950 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009319405186485838, + "loss": 2.1992, + "step": 951 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009317748669170198, + "loss": 2.1504, + "step": 952 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009316090285977486, + "loss": 2.1436, + "step": 953 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009314430037624362, + "loss": 2.1865, + "step": 954 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009312767924828283, + "loss": 2.168, + "step": 955 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009311103948307519, + "loss": 2.1309, + "step": 956 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009309438108781142, + "loss": 2.1992, + "step": 957 + }, + { + "epoch": 0.19, + "learning_rate": 0.000930777040696903, + "loss": 2.2051, + "step": 958 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009306100843591867, + "loss": 2.1426, + "step": 959 + }, + { + "epoch": 0.19, + "learning_rate": 0.0009304429419371139, + "loss": 2.168, + "step": 960 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009302756135029137, + "loss": 2.1934, + "step": 961 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009301080991288957, + "loss": 2.2539, + "step": 962 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009299403988874495, + "loss": 2.2539, + "step": 963 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009297725128510456, + "loss": 2.0898, + "step": 964 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009296044410922344, + "loss": 2.1396, + "step": 965 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009294361836836465, + "loss": 2.2129, + "step": 966 + }, + { + "epoch": 0.2, + "learning_rate": 0.000929267740697993, + "loss": 2.1621, + "step": 967 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009290991122080649, + "loss": 2.1719, + "step": 968 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009289302982867335, + "loss": 2.2559, + "step": 969 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009287612990069508, + "loss": 2.2246, + "step": 970 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009285921144417475, + "loss": 2.2207, + "step": 971 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009284227446642357, + "loss": 2.1514, + "step": 972 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009282531897476071, + "loss": 2.2324, + "step": 973 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009280834497651332, + "loss": 2.2383, + "step": 974 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009279135247901658, + "loss": 2.1953, + "step": 975 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009277434148961363, + "loss": 2.1934, + "step": 976 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009275731201565563, + "loss": 2.25, + "step": 977 + }, + { + "epoch": 0.2, + "learning_rate": 0.000927402640645017, + "loss": 2.1758, + "step": 978 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009272319764351899, + "loss": 2.1758, + "step": 979 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009270611276008258, + "loss": 2.2168, + "step": 980 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009268900942157556, + "loss": 2.1963, + "step": 981 + }, + { + "epoch": 0.2, + "learning_rate": 0.00092671887635389, + "loss": 2.2139, + "step": 982 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009265474740892192, + "loss": 2.1855, + "step": 983 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009263758874958131, + "loss": 2.2598, + "step": 984 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009262041166478213, + "loss": 2.1045, + "step": 985 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009260321616194733, + "loss": 2.1855, + "step": 986 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009258600224850777, + "loss": 2.2285, + "step": 987 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009256876993190231, + "loss": 2.1133, + "step": 988 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009255151921957772, + "loss": 2.1309, + "step": 989 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009253425011898878, + "loss": 2.1729, + "step": 990 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009251696263759815, + "loss": 2.1289, + "step": 991 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009249965678287647, + "loss": 2.1982, + "step": 992 + }, + { + "epoch": 0.2, + "learning_rate": 0.000924823325623023, + "loss": 2.2012, + "step": 993 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009246498998336218, + "loss": 2.2754, + "step": 994 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009244762905355053, + "loss": 2.1396, + "step": 995 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009243024978036974, + "loss": 2.1357, + "step": 996 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009241285217133009, + "loss": 2.2344, + "step": 997 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009239543623394982, + "loss": 2.1309, + "step": 998 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009237800197575505, + "loss": 2.252, + "step": 999 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009236054940427987, + "loss": 2.2266, + "step": 1000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009234307852706624, + "loss": 2.252, + "step": 1001 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009232558935166407, + "loss": 2.293, + "step": 1002 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009230808188563109, + "loss": 2.2139, + "step": 1003 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009229055613653308, + "loss": 2.168, + "step": 1004 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009227301211194356, + "loss": 2.126, + "step": 1005 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009225544981944409, + "loss": 2.2656, + "step": 1006 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009223786926662403, + "loss": 2.207, + "step": 1007 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009222027046108066, + "loss": 2.2266, + "step": 1008 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009220265341041916, + "loss": 2.2754, + "step": 1009 + }, + { + "epoch": 0.2, + "learning_rate": 0.0009218501812225257, + "loss": 2.1475, + "step": 1010 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009216736460420183, + "loss": 2.2188, + "step": 1011 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009214969286389577, + "loss": 2.1699, + "step": 1012 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009213200290897103, + "loss": 2.1328, + "step": 1013 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009211429474707221, + "loss": 2.1699, + "step": 1014 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009209656838585174, + "loss": 2.1494, + "step": 1015 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009207882383296988, + "loss": 2.1289, + "step": 1016 + }, + { + "epoch": 0.21, + "learning_rate": 0.000920610610960948, + "loss": 2.2363, + "step": 1017 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009204328018290252, + "loss": 2.209, + "step": 1018 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009202548110107688, + "loss": 2.207, + "step": 1019 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009200766385830962, + "loss": 2.1328, + "step": 1020 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009198982846230028, + "loss": 2.1846, + "step": 1021 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009197197492075631, + "loss": 2.2207, + "step": 1022 + }, + { + "epoch": 0.21, + "learning_rate": 0.000919541032413929, + "loss": 2.1426, + "step": 1023 + }, + { + "epoch": 0.21, + "learning_rate": 0.000919362134319332, + "loss": 2.127, + "step": 1024 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009191830550010811, + "loss": 2.1318, + "step": 1025 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009190037945365637, + "loss": 2.1836, + "step": 1026 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009188243530032457, + "loss": 2.127, + "step": 1027 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009186447304786714, + "loss": 2.2217, + "step": 1028 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009184649270404628, + "loss": 2.1367, + "step": 1029 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009182849427663204, + "loss": 2.1348, + "step": 1030 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009181047777340232, + "loss": 2.2539, + "step": 1031 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009179244320214275, + "loss": 2.2129, + "step": 1032 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009177439057064682, + "loss": 2.209, + "step": 1033 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009175631988671583, + "loss": 2.2383, + "step": 1034 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009173823115815886, + "loss": 2.1582, + "step": 1035 + }, + { + "epoch": 0.21, + "learning_rate": 0.000917201243927928, + "loss": 2.2207, + "step": 1036 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009170199959844231, + "loss": 2.1885, + "step": 1037 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009168385678293986, + "loss": 2.1562, + "step": 1038 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009166569595412575, + "loss": 2.1729, + "step": 1039 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009164751711984798, + "loss": 2.0889, + "step": 1040 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009162932028796239, + "loss": 2.1074, + "step": 1041 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009161110546633258, + "loss": 2.2363, + "step": 1042 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009159287266282992, + "loss": 2.1846, + "step": 1043 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009157462188533356, + "loss": 2.2168, + "step": 1044 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009155635314173039, + "loss": 2.1318, + "step": 1045 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009153806643991514, + "loss": 2.1289, + "step": 1046 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009151976178779019, + "loss": 2.1719, + "step": 1047 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009150143919326577, + "loss": 2.1641, + "step": 1048 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009148309866425981, + "loss": 2.2051, + "step": 1049 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009146474020869801, + "loss": 2.167, + "step": 1050 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009144636383451384, + "loss": 2.1445, + "step": 1051 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009142796954964845, + "loss": 2.0801, + "step": 1052 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009140955736205078, + "loss": 2.1484, + "step": 1053 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009139112727967751, + "loss": 2.1602, + "step": 1054 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009137267931049304, + "loss": 2.1631, + "step": 1055 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009135421346246948, + "loss": 2.166, + "step": 1056 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009133572974358668, + "loss": 2.1621, + "step": 1057 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009131722816183224, + "loss": 2.1328, + "step": 1058 + }, + { + "epoch": 0.21, + "learning_rate": 0.0009129870872520143, + "loss": 2.2051, + "step": 1059 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009128017144169727, + "loss": 2.1289, + "step": 1060 + }, + { + "epoch": 0.22, + "learning_rate": 0.000912616163193305, + "loss": 2.2285, + "step": 1061 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009124304336611953, + "loss": 2.1562, + "step": 1062 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009122445259009052, + "loss": 2.1445, + "step": 1063 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009120584399927727, + "loss": 2.2148, + "step": 1064 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009118721760172137, + "loss": 2.1367, + "step": 1065 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009116857340547202, + "loss": 2.1816, + "step": 1066 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009114991141858617, + "loss": 2.166, + "step": 1067 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009113123164912842, + "loss": 2.2207, + "step": 1068 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009111253410517106, + "loss": 2.1602, + "step": 1069 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009109381879479407, + "loss": 2.2109, + "step": 1070 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009107508572608512, + "loss": 2.2695, + "step": 1071 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009105633490713952, + "loss": 2.168, + "step": 1072 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009103756634606029, + "loss": 2.1709, + "step": 1073 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009101878005095812, + "loss": 2.207, + "step": 1074 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009099997602995128, + "loss": 2.1494, + "step": 1075 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009098115429116582, + "loss": 2.1934, + "step": 1076 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009096231484273533, + "loss": 2.1074, + "step": 1077 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009094345769280119, + "loss": 2.1846, + "step": 1078 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009092458284951229, + "loss": 2.1885, + "step": 1079 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009090569032102525, + "loss": 2.1738, + "step": 1080 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009088678011550431, + "loss": 2.2227, + "step": 1081 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009086785224112132, + "loss": 2.1924, + "step": 1082 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009084890670605586, + "loss": 2.1699, + "step": 1083 + }, + { + "epoch": 0.22, + "learning_rate": 0.00090829943518495, + "loss": 2.1953, + "step": 1084 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009081096268663358, + "loss": 2.2031, + "step": 1085 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009079196421867395, + "loss": 2.2197, + "step": 1086 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009077294812282615, + "loss": 2.1641, + "step": 1087 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009075391440730783, + "loss": 2.165, + "step": 1088 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009073486308034422, + "loss": 2.21, + "step": 1089 + }, + { + "epoch": 0.22, + "learning_rate": 0.000907157941501682, + "loss": 2.168, + "step": 1090 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009069670762502023, + "loss": 2.1162, + "step": 1091 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009067760351314837, + "loss": 2.25, + "step": 1092 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009065848182280834, + "loss": 2.1426, + "step": 1093 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009063934256226337, + "loss": 2.1377, + "step": 1094 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009062018573978434, + "loss": 2.2217, + "step": 1095 + }, + { + "epoch": 0.22, + "learning_rate": 0.000906010113636497, + "loss": 2.1826, + "step": 1096 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009058181944214547, + "loss": 2.2461, + "step": 1097 + }, + { + "epoch": 0.22, + "learning_rate": 0.000905626099835653, + "loss": 2.1875, + "step": 1098 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009054338299621037, + "loss": 2.2598, + "step": 1099 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009052413848838945, + "loss": 2.1641, + "step": 1100 + }, + { + "epoch": 0.22, + "learning_rate": 0.000905048764684189, + "loss": 2.124, + "step": 1101 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009048559694462262, + "loss": 2.0977, + "step": 1102 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009046629992533208, + "loss": 2.1211, + "step": 1103 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009044698541888634, + "loss": 2.1387, + "step": 1104 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009042765343363197, + "loss": 2.1904, + "step": 1105 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009040830397792313, + "loss": 2.1621, + "step": 1106 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009038893706012152, + "loss": 2.1719, + "step": 1107 + }, + { + "epoch": 0.22, + "learning_rate": 0.0009036955268859638, + "loss": 2.1582, + "step": 1108 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009035015087172449, + "loss": 2.0938, + "step": 1109 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009033073161789019, + "loss": 2.0957, + "step": 1110 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009031129493548529, + "loss": 2.1377, + "step": 1111 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009029184083290926, + "loss": 2.2402, + "step": 1112 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009027236931856894, + "loss": 2.2217, + "step": 1113 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009025288040087885, + "loss": 2.1719, + "step": 1114 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009023337408826089, + "loss": 2.2148, + "step": 1115 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009021385038914457, + "loss": 2.1934, + "step": 1116 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009019430931196689, + "loss": 2.1855, + "step": 1117 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009017475086517233, + "loss": 2.2402, + "step": 1118 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009015517505721293, + "loss": 2.1426, + "step": 1119 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009013558189654818, + "loss": 2.1943, + "step": 1120 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009011597139164511, + "loss": 2.1611, + "step": 1121 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009009634355097822, + "loss": 2.1621, + "step": 1122 + }, + { + "epoch": 0.23, + "learning_rate": 0.000900766983830295, + "loss": 2.1074, + "step": 1123 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009005703589628844, + "loss": 2.2559, + "step": 1124 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009003735609925201, + "loss": 2.1104, + "step": 1125 + }, + { + "epoch": 0.23, + "learning_rate": 0.0009001765900042468, + "loss": 2.2402, + "step": 1126 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008999794460831835, + "loss": 2.0957, + "step": 1127 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008997821293145243, + "loss": 2.1191, + "step": 1128 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008995846397835381, + "loss": 2.1875, + "step": 1129 + }, + { + "epoch": 0.23, + "learning_rate": 0.000899386977575568, + "loss": 2.2031, + "step": 1130 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008991891427760321, + "loss": 2.1602, + "step": 1131 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008989911354704229, + "loss": 2.0957, + "step": 1132 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008987929557443075, + "loss": 2.1689, + "step": 1133 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008985946036833278, + "loss": 2.1768, + "step": 1134 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008983960793731995, + "loss": 2.0723, + "step": 1135 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008981973828997134, + "loss": 2.1279, + "step": 1136 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008979985143487344, + "loss": 2.0947, + "step": 1137 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008977994738062016, + "loss": 2.1426, + "step": 1138 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008976002613581288, + "loss": 2.1104, + "step": 1139 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008974008770906039, + "loss": 2.2246, + "step": 1140 + }, + { + "epoch": 0.23, + "learning_rate": 0.000897201321089789, + "loss": 2.1729, + "step": 1141 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008970015934419204, + "loss": 2.207, + "step": 1142 + }, + { + "epoch": 0.23, + "learning_rate": 0.000896801694233309, + "loss": 2.2168, + "step": 1143 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008966016235503389, + "loss": 2.082, + "step": 1144 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008964013814794693, + "loss": 2.2129, + "step": 1145 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008962009681072332, + "loss": 2.1328, + "step": 1146 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008960003835202369, + "loss": 2.1426, + "step": 1147 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008957996278051617, + "loss": 2.1074, + "step": 1148 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008955987010487623, + "loss": 2.1729, + "step": 1149 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008953976033378674, + "loss": 2.1592, + "step": 1150 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008951963347593796, + "loss": 2.2344, + "step": 1151 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008949948954002756, + "loss": 2.168, + "step": 1152 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008947932853476051, + "loss": 2.1543, + "step": 1153 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008945915046884926, + "loss": 2.1836, + "step": 1154 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008943895535101356, + "loss": 2.1963, + "step": 1155 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008941874318998057, + "loss": 2.1328, + "step": 1156 + }, + { + "epoch": 0.23, + "learning_rate": 0.0008939851399448477, + "loss": 2.1855, + "step": 1157 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008937826777326807, + "loss": 2.1143, + "step": 1158 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008935800453507965, + "loss": 2.1875, + "step": 1159 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008933772428867613, + "loss": 2.1621, + "step": 1160 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008931742704282139, + "loss": 2.1875, + "step": 1161 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008929711280628676, + "loss": 2.2793, + "step": 1162 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008927678158785085, + "loss": 2.2109, + "step": 1163 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008925643339629957, + "loss": 2.1748, + "step": 1164 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008923606824042627, + "loss": 2.0762, + "step": 1165 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008921568612903153, + "loss": 2.168, + "step": 1166 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008919528707092332, + "loss": 2.124, + "step": 1167 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008917487107491693, + "loss": 2.2021, + "step": 1168 + }, + { + "epoch": 0.24, + "learning_rate": 0.000891544381498349, + "loss": 2.209, + "step": 1169 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008913398830450721, + "loss": 2.1816, + "step": 1170 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008911352154777101, + "loss": 2.1162, + "step": 1171 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008909303788847087, + "loss": 2.209, + "step": 1172 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008907253733545864, + "loss": 2.1768, + "step": 1173 + }, + { + "epoch": 0.24, + "learning_rate": 0.000890520198975934, + "loss": 2.1387, + "step": 1174 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008903148558374162, + "loss": 2.1455, + "step": 1175 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008901093440277701, + "loss": 2.1836, + "step": 1176 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008899036636358058, + "loss": 2.2324, + "step": 1177 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008896978147504063, + "loss": 2.1836, + "step": 1178 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008894917974605275, + "loss": 2.2402, + "step": 1179 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008892856118551978, + "loss": 2.1152, + "step": 1180 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008890792580235183, + "loss": 2.1367, + "step": 1181 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008888727360546635, + "loss": 2.041, + "step": 1182 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008886660460378798, + "loss": 2.1523, + "step": 1183 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008884591880624861, + "loss": 2.1709, + "step": 1184 + }, + { + "epoch": 0.24, + "learning_rate": 0.000888252162217875, + "loss": 2.1924, + "step": 1185 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008880449685935102, + "loss": 2.1895, + "step": 1186 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008878376072789291, + "loss": 2.1699, + "step": 1187 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008876300783637408, + "loss": 2.1748, + "step": 1188 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008874223819376272, + "loss": 2.1797, + "step": 1189 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008872145180903423, + "loss": 2.1641, + "step": 1190 + }, + { + "epoch": 0.24, + "learning_rate": 0.000887006486911713, + "loss": 2.2324, + "step": 1191 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008867982884916376, + "loss": 2.2539, + "step": 1192 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008865899229200878, + "loss": 2.0557, + "step": 1193 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008863813902871067, + "loss": 2.1943, + "step": 1194 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008861726906828096, + "loss": 2.2246, + "step": 1195 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008859638241973845, + "loss": 2.1816, + "step": 1196 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008857547909210912, + "loss": 2.2002, + "step": 1197 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008855455909442615, + "loss": 2.0742, + "step": 1198 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008853362243572994, + "loss": 2.2871, + "step": 1199 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008851266912506806, + "loss": 2.1484, + "step": 1200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008849169917149531, + "loss": 2.1758, + "step": 1201 + }, + { + "epoch": 0.24, + "learning_rate": 0.000884707125840737, + "loss": 2.1484, + "step": 1202 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008844970937187236, + "loss": 2.1328, + "step": 1203 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008842868954396766, + "loss": 2.1035, + "step": 1204 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008840765310944315, + "loss": 2.0947, + "step": 1205 + }, + { + "epoch": 0.24, + "learning_rate": 0.000883866000773895, + "loss": 2.1201, + "step": 1206 + }, + { + "epoch": 0.24, + "learning_rate": 0.0008836553045690462, + "loss": 2.2061, + "step": 1207 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008834444425709356, + "loss": 2.1641, + "step": 1208 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008832334148706853, + "loss": 2.1201, + "step": 1209 + }, + { + "epoch": 0.25, + "learning_rate": 0.000883022221559489, + "loss": 2.2051, + "step": 1210 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008828108627286122, + "loss": 2.165, + "step": 1211 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008825993384693917, + "loss": 2.1582, + "step": 1212 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008823876488732357, + "loss": 2.1953, + "step": 1213 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008821757940316242, + "loss": 2.1133, + "step": 1214 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008819637740361081, + "loss": 2.1816, + "step": 1215 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008817515889783105, + "loss": 2.1387, + "step": 1216 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008815392389499248, + "loss": 2.3008, + "step": 1217 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008813267240427164, + "loss": 2.2305, + "step": 1218 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008811140443485217, + "loss": 2.1758, + "step": 1219 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008809011999592484, + "loss": 2.1562, + "step": 1220 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008806881909668754, + "loss": 2.165, + "step": 1221 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008804750174634526, + "loss": 2.0996, + "step": 1222 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008802616795411009, + "loss": 2.1123, + "step": 1223 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008800481772920126, + "loss": 2.123, + "step": 1224 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008798345108084507, + "loss": 2.1426, + "step": 1225 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008796206801827493, + "loss": 2.1211, + "step": 1226 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008794066855073135, + "loss": 2.1484, + "step": 1227 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008791925268746192, + "loss": 2.1973, + "step": 1228 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008789782043772132, + "loss": 2.1445, + "step": 1229 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008787637181077129, + "loss": 2.1885, + "step": 1230 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008785490681588069, + "loss": 2.1982, + "step": 1231 + }, + { + "epoch": 0.25, + "learning_rate": 0.000878334254623254, + "loss": 2.2021, + "step": 1232 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008781192775938843, + "loss": 2.1572, + "step": 1233 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008779041371635981, + "loss": 2.1016, + "step": 1234 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008776888334253662, + "loss": 2.2422, + "step": 1235 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008774733664722305, + "loss": 2.207, + "step": 1236 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008772577363973033, + "loss": 2.2148, + "step": 1237 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008770419432937667, + "loss": 2.2227, + "step": 1238 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008768259872548742, + "loss": 2.1787, + "step": 1239 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008766098683739494, + "loss": 2.1357, + "step": 1240 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008763935867443858, + "loss": 2.251, + "step": 1241 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008761771424596476, + "loss": 2.168, + "step": 1242 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008759605356132698, + "loss": 2.2148, + "step": 1243 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008757437662988566, + "loss": 2.3203, + "step": 1244 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008755268346100834, + "loss": 2.126, + "step": 1245 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008753097406406948, + "loss": 2.1035, + "step": 1246 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008750924844845064, + "loss": 2.126, + "step": 1247 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008748750662354034, + "loss": 2.1631, + "step": 1248 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008746574859873416, + "loss": 2.1133, + "step": 1249 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008744397438343456, + "loss": 2.1738, + "step": 1250 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008742218398705114, + "loss": 2.1738, + "step": 1251 + }, + { + "epoch": 0.25, + "learning_rate": 0.000874003774190004, + "loss": 2.1045, + "step": 1252 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008737855468870587, + "loss": 2.1621, + "step": 1253 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008735671580559803, + "loss": 2.1182, + "step": 1254 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008733486077911439, + "loss": 2.1523, + "step": 1255 + }, + { + "epoch": 0.25, + "learning_rate": 0.0008731298961869938, + "loss": 2.1348, + "step": 1256 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008729110233380445, + "loss": 2.1084, + "step": 1257 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008726919893388798, + "loss": 2.1895, + "step": 1258 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008724727942841535, + "loss": 2.1768, + "step": 1259 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008722534382685888, + "loss": 2.1094, + "step": 1260 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008720339213869781, + "loss": 2.1084, + "step": 1261 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008718142437341843, + "loss": 2.1826, + "step": 1262 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008715944054051386, + "loss": 2.1699, + "step": 1263 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008713744064948425, + "loss": 2.1504, + "step": 1264 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008711542470983667, + "loss": 2.1396, + "step": 1265 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008709339273108507, + "loss": 2.1709, + "step": 1266 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008707134472275041, + "loss": 2.0762, + "step": 1267 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008704928069436054, + "loss": 2.1533, + "step": 1268 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008702720065545023, + "loss": 2.2275, + "step": 1269 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008700510461556117, + "loss": 2.0713, + "step": 1270 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008698299258424198, + "loss": 2.1641, + "step": 1271 + }, + { + "epoch": 0.26, + "learning_rate": 0.000869608645710482, + "loss": 2.1396, + "step": 1272 + }, + { + "epoch": 0.26, + "learning_rate": 0.000869387205855422, + "loss": 2.1562, + "step": 1273 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008691656063729334, + "loss": 2.2041, + "step": 1274 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008689438473587785, + "loss": 2.1533, + "step": 1275 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008687219289087885, + "loss": 2.1348, + "step": 1276 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008684998511188633, + "loss": 2.1895, + "step": 1277 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008682776140849722, + "loss": 2.2139, + "step": 1278 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008680552179031527, + "loss": 2.1562, + "step": 1279 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008678326626695114, + "loss": 2.1914, + "step": 1280 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008676099484802238, + "loss": 2.0693, + "step": 1281 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008673870754315336, + "loss": 2.1348, + "step": 1282 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008671640436197537, + "loss": 2.1377, + "step": 1283 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008669408531412651, + "loss": 2.2422, + "step": 1284 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008667175040925174, + "loss": 2.1484, + "step": 1285 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008664939965700296, + "loss": 2.1826, + "step": 1286 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008662703306703878, + "loss": 2.125, + "step": 1287 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008660465064902477, + "loss": 2.1084, + "step": 1288 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008658225241263329, + "loss": 2.1885, + "step": 1289 + }, + { + "epoch": 0.26, + "learning_rate": 0.000865598383675435, + "loss": 2.0977, + "step": 1290 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008653740852344149, + "loss": 2.1807, + "step": 1291 + }, + { + "epoch": 0.26, + "learning_rate": 0.000865149628900201, + "loss": 2.248, + "step": 1292 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008649250147697898, + "loss": 2.1592, + "step": 1293 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008647002429402467, + "loss": 2.1162, + "step": 1294 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008644753135087047, + "loss": 2.1621, + "step": 1295 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008642502265723653, + "loss": 2.1377, + "step": 1296 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008640249822284975, + "loss": 2.1729, + "step": 1297 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008637995805744387, + "loss": 2.0908, + "step": 1298 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008635740217075946, + "loss": 2.1494, + "step": 1299 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008633483057254381, + "loss": 2.2344, + "step": 1300 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008631224327255105, + "loss": 2.0938, + "step": 1301 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008628964028054208, + "loss": 2.1123, + "step": 1302 + }, + { + "epoch": 0.26, + "learning_rate": 0.000862670216062846, + "loss": 2.1162, + "step": 1303 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008624438725955305, + "loss": 2.0459, + "step": 1304 + }, + { + "epoch": 0.26, + "learning_rate": 0.0008622173725012867, + "loss": 2.124, + "step": 1305 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008619907158779947, + "loss": 2.0742, + "step": 1306 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008617639028236021, + "loss": 2.2012, + "step": 1307 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008615369334361241, + "loss": 2.1504, + "step": 1308 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008613098078136437, + "loss": 2.1992, + "step": 1309 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008610825260543109, + "loss": 2.084, + "step": 1310 + }, + { + "epoch": 0.27, + "learning_rate": 0.000860855088256344, + "loss": 2.0938, + "step": 1311 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008606274945180279, + "loss": 2.2041, + "step": 1312 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008603997449377152, + "loss": 2.0566, + "step": 1313 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008601718396138262, + "loss": 2.2012, + "step": 1314 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008599437786448479, + "loss": 2.1299, + "step": 1315 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008597155621293349, + "loss": 2.1777, + "step": 1316 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008594871901659089, + "loss": 2.2051, + "step": 1317 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008592586628532588, + "loss": 2.0771, + "step": 1318 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008590299802901411, + "loss": 2.1953, + "step": 1319 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008588011425753783, + "loss": 2.0117, + "step": 1320 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008585721498078611, + "loss": 2.1562, + "step": 1321 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008583430020865464, + "loss": 2.0488, + "step": 1322 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008581136995104585, + "loss": 2.1348, + "step": 1323 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008578842421786887, + "loss": 2.1426, + "step": 1324 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008576546301903947, + "loss": 2.084, + "step": 1325 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008574248636448013, + "loss": 2.1709, + "step": 1326 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008571949426412002, + "loss": 2.123, + "step": 1327 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008569648672789496, + "loss": 2.2002, + "step": 1328 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008567346376574746, + "loss": 2.0967, + "step": 1329 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008565042538762669, + "loss": 2.1035, + "step": 1330 + }, + { + "epoch": 0.27, + "learning_rate": 0.000856273716034885, + "loss": 2.1855, + "step": 1331 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008560430242329536, + "loss": 2.0771, + "step": 1332 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008558121785701643, + "loss": 2.1201, + "step": 1333 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008555811791462746, + "loss": 2.1895, + "step": 1334 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008553500260611094, + "loss": 2.1816, + "step": 1335 + }, + { + "epoch": 0.27, + "learning_rate": 0.000855118719414559, + "loss": 2.0947, + "step": 1336 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008548872593065811, + "loss": 2.1855, + "step": 1337 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008546556458371987, + "loss": 2.1094, + "step": 1338 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008544238791065016, + "loss": 2.1738, + "step": 1339 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008541919592146458, + "loss": 2.123, + "step": 1340 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008539598862618533, + "loss": 2.1582, + "step": 1341 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008537276603484128, + "loss": 2.1104, + "step": 1342 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008534952815746782, + "loss": 2.0547, + "step": 1343 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008532627500410702, + "loss": 2.1348, + "step": 1344 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008530300658480752, + "loss": 2.1084, + "step": 1345 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008527972290962455, + "loss": 2.083, + "step": 1346 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008525642398861997, + "loss": 2.1377, + "step": 1347 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008523310983186219, + "loss": 2.1064, + "step": 1348 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008520978044942622, + "loss": 2.0986, + "step": 1349 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008518643585139364, + "loss": 2.0986, + "step": 1350 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008516307604785261, + "loss": 2.1543, + "step": 1351 + }, + { + "epoch": 0.27, + "learning_rate": 0.000851397010488979, + "loss": 2.1475, + "step": 1352 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008511631086463074, + "loss": 2.1992, + "step": 1353 + }, + { + "epoch": 0.27, + "learning_rate": 0.0008509290550515907, + "loss": 2.1611, + "step": 1354 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008506948498059726, + "loss": 2.0312, + "step": 1355 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008504604930106628, + "loss": 2.1719, + "step": 1356 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008502259847669371, + "loss": 2.0986, + "step": 1357 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008499913251761353, + "loss": 2.0605, + "step": 1358 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008497565143396643, + "loss": 2.125, + "step": 1359 + }, + { + "epoch": 0.28, + "learning_rate": 0.000849521552358995, + "loss": 2.1201, + "step": 1360 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008492864393356644, + "loss": 2.1426, + "step": 1361 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008490511753712744, + "loss": 2.1387, + "step": 1362 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008488157605674924, + "loss": 2.2305, + "step": 1363 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008485801950260507, + "loss": 2.1162, + "step": 1364 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008483444788487469, + "loss": 2.1416, + "step": 1365 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008481086121374437, + "loss": 2.1182, + "step": 1366 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008478725949940686, + "loss": 2.168, + "step": 1367 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008476364275206149, + "loss": 2.1162, + "step": 1368 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008474001098191398, + "loss": 2.1025, + "step": 1369 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008471636419917659, + "loss": 2.1602, + "step": 1370 + }, + { + "epoch": 0.28, + "learning_rate": 0.000846927024140681, + "loss": 2.1953, + "step": 1371 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008466902563681371, + "loss": 2.125, + "step": 1372 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008464533387764517, + "loss": 2.0547, + "step": 1373 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008462162714680062, + "loss": 2.1064, + "step": 1374 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008459790545452475, + "loss": 2.1094, + "step": 1375 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008457416881106866, + "loss": 2.123, + "step": 1376 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008455041722668994, + "loss": 2.2158, + "step": 1377 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008452665071165262, + "loss": 2.1094, + "step": 1378 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008450286927622721, + "loss": 2.1348, + "step": 1379 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008447907293069063, + "loss": 1.9922, + "step": 1380 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008445526168532626, + "loss": 2.1025, + "step": 1381 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008443143555042393, + "loss": 2.1768, + "step": 1382 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008440759453627989, + "loss": 2.1621, + "step": 1383 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008438373865319683, + "loss": 2.0693, + "step": 1384 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008435986791148385, + "loss": 2.2158, + "step": 1385 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008433598232145647, + "loss": 2.0928, + "step": 1386 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008431208189343669, + "loss": 2.1621, + "step": 1387 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008428816663775281, + "loss": 2.1211, + "step": 1388 + }, + { + "epoch": 0.28, + "learning_rate": 0.000842642365647396, + "loss": 2.1152, + "step": 1389 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008424029168473828, + "loss": 2.1582, + "step": 1390 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008421633200809637, + "loss": 2.2158, + "step": 1391 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008419235754516788, + "loss": 2.0908, + "step": 1392 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008416836830631311, + "loss": 2.1152, + "step": 1393 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008414436430189882, + "loss": 2.1045, + "step": 1394 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008412034554229814, + "loss": 2.1582, + "step": 1395 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008409631203789056, + "loss": 2.1357, + "step": 1396 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008407226379906197, + "loss": 2.1855, + "step": 1397 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008404820083620455, + "loss": 2.1035, + "step": 1398 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008402412315971692, + "loss": 2.1777, + "step": 1399 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008400003078000407, + "loss": 2.1738, + "step": 1400 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008397592370747726, + "loss": 2.0889, + "step": 1401 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008395180195255419, + "loss": 2.127, + "step": 1402 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008392766552565885, + "loss": 2.1221, + "step": 1403 + }, + { + "epoch": 0.28, + "learning_rate": 0.0008390351443722157, + "loss": 2.1699, + "step": 1404 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008387934869767904, + "loss": 2.0342, + "step": 1405 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008385516831747429, + "loss": 2.0869, + "step": 1406 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008383097330705662, + "loss": 2.2363, + "step": 1407 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008380676367688171, + "loss": 2.1465, + "step": 1408 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008378253943741157, + "loss": 2.1035, + "step": 1409 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008375830059911442, + "loss": 2.127, + "step": 1410 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008373404717246494, + "loss": 2.1865, + "step": 1411 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008370977916794397, + "loss": 2.2129, + "step": 1412 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008368549659603876, + "loss": 2.0654, + "step": 1413 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008366119946724279, + "loss": 2.1357, + "step": 1414 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008363688779205585, + "loss": 2.0469, + "step": 1415 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008361256158098401, + "loss": 2.1543, + "step": 1416 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008358822084453964, + "loss": 2.1631, + "step": 1417 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008356386559324137, + "loss": 2.123, + "step": 1418 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008353949583761411, + "loss": 2.1572, + "step": 1419 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008351511158818904, + "loss": 2.168, + "step": 1420 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008349071285550358, + "loss": 2.1367, + "step": 1421 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008346629965010146, + "loss": 2.1621, + "step": 1422 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008344187198253259, + "loss": 2.1719, + "step": 1423 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008341742986335321, + "loss": 2.084, + "step": 1424 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008339297330312574, + "loss": 2.167, + "step": 1425 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008336850231241888, + "loss": 2.1396, + "step": 1426 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008334401690180755, + "loss": 2.2148, + "step": 1427 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008331951708187291, + "loss": 2.2051, + "step": 1428 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008329500286320233, + "loss": 2.0791, + "step": 1429 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008327047425638942, + "loss": 2.1777, + "step": 1430 + }, + { + "epoch": 0.29, + "learning_rate": 0.00083245931272034, + "loss": 2.1836, + "step": 1431 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008322137392074211, + "loss": 2.0967, + "step": 1432 + }, + { + "epoch": 0.29, + "learning_rate": 0.00083196802213126, + "loss": 2.0088, + "step": 1433 + }, + { + "epoch": 0.29, + "learning_rate": 0.000831722161598041, + "loss": 2.1094, + "step": 1434 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008314761577140105, + "loss": 2.1172, + "step": 1435 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008312300105854771, + "loss": 2.0596, + "step": 1436 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008309837203188111, + "loss": 2.125, + "step": 1437 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008307372870204446, + "loss": 2.1777, + "step": 1438 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008304907107968716, + "loss": 2.1582, + "step": 1439 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008302439917546478, + "loss": 2.1025, + "step": 1440 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008299971300003908, + "loss": 2.1211, + "step": 1441 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008297501256407795, + "loss": 2.1709, + "step": 1442 + }, + { + "epoch": 0.29, + "learning_rate": 0.000829502978782555, + "loss": 2.0312, + "step": 1443 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008292556895325194, + "loss": 2.0732, + "step": 1444 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008290082579975364, + "loss": 2.1152, + "step": 1445 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008287606842845319, + "loss": 2.126, + "step": 1446 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008285129685004922, + "loss": 2.0977, + "step": 1447 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008282651107524657, + "loss": 2.124, + "step": 1448 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008280171111475619, + "loss": 2.0928, + "step": 1449 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008277689697929516, + "loss": 2.1611, + "step": 1450 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008275206867958671, + "loss": 2.1689, + "step": 1451 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008272722622636015, + "loss": 2.1924, + "step": 1452 + }, + { + "epoch": 0.29, + "learning_rate": 0.0008270236963035092, + "loss": 2.1934, + "step": 1453 + }, + { + "epoch": 0.3, + "learning_rate": 0.000826774989023006, + "loss": 2.2363, + "step": 1454 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008265261405295685, + "loss": 2.1387, + "step": 1455 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008262771509307345, + "loss": 2.1211, + "step": 1456 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008260280203341026, + "loss": 2.1064, + "step": 1457 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008257787488473321, + "loss": 2.1387, + "step": 1458 + }, + { + "epoch": 0.3, + "learning_rate": 0.000825529336578144, + "loss": 2.125, + "step": 1459 + }, + { + "epoch": 0.3, + "learning_rate": 0.000825279783634319, + "loss": 2.0811, + "step": 1460 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008250300901236998, + "loss": 2.1016, + "step": 1461 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008247802561541889, + "loss": 2.0791, + "step": 1462 + }, + { + "epoch": 0.3, + "learning_rate": 0.00082453028183375, + "loss": 2.1484, + "step": 1463 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008242801672704072, + "loss": 2.0723, + "step": 1464 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008240299125722454, + "loss": 2.0811, + "step": 1465 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008237795178474098, + "loss": 2.125, + "step": 1466 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008235289832041065, + "loss": 2.1982, + "step": 1467 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008232783087506017, + "loss": 2.124, + "step": 1468 + }, + { + "epoch": 0.3, + "learning_rate": 0.000823027494595222, + "loss": 2.1465, + "step": 1469 + }, + { + "epoch": 0.3, + "learning_rate": 0.000822776540846355, + "loss": 2.1309, + "step": 1470 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008225254476124477, + "loss": 2.1035, + "step": 1471 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008222742150020082, + "loss": 2.082, + "step": 1472 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008220228431236041, + "loss": 2.1025, + "step": 1473 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008217713320858637, + "loss": 2.1553, + "step": 1474 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008215196819974754, + "loss": 2.1318, + "step": 1475 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008212678929671873, + "loss": 2.1348, + "step": 1476 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008210159651038081, + "loss": 2.0732, + "step": 1477 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008207638985162061, + "loss": 2.1084, + "step": 1478 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008205116933133094, + "loss": 2.1113, + "step": 1479 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008202593496041067, + "loss": 2.1191, + "step": 1480 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008200068674976458, + "loss": 2.1289, + "step": 1481 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008197542471030348, + "loss": 2.0566, + "step": 1482 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008195014885294414, + "loss": 2.0684, + "step": 1483 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008192485918860927, + "loss": 2.2266, + "step": 1484 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008189955572822761, + "loss": 2.168, + "step": 1485 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008187423848273379, + "loss": 2.0635, + "step": 1486 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008184890746306848, + "loss": 2.1367, + "step": 1487 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008182356268017822, + "loss": 2.0635, + "step": 1488 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008179820414501556, + "loss": 2.0996, + "step": 1489 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008177283186853895, + "loss": 2.0957, + "step": 1490 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008174744586171281, + "loss": 2.0537, + "step": 1491 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008172204613550747, + "loss": 2.166, + "step": 1492 + }, + { + "epoch": 0.3, + "learning_rate": 0.000816966327008992, + "loss": 2.1406, + "step": 1493 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008167120556887018, + "loss": 2.1367, + "step": 1494 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008164576475040854, + "loss": 2.0898, + "step": 1495 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008162031025650831, + "loss": 2.1514, + "step": 1496 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008159484209816941, + "loss": 2.0439, + "step": 1497 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008156936028639768, + "loss": 2.0625, + "step": 1498 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008154386483220486, + "loss": 2.0918, + "step": 1499 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008151835574660862, + "loss": 2.1318, + "step": 1500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008149283304063243, + "loss": 2.0635, + "step": 1501 + }, + { + "epoch": 0.3, + "learning_rate": 0.0008146729672530574, + "loss": 2.165, + "step": 1502 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008144174681166383, + "loss": 2.1133, + "step": 1503 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008141618331074788, + "loss": 2.0732, + "step": 1504 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008139060623360494, + "loss": 2.0723, + "step": 1505 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008136501559128788, + "loss": 2.0342, + "step": 1506 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008133941139485551, + "loss": 2.0957, + "step": 1507 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008131379365537245, + "loss": 2.1494, + "step": 1508 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008128816238390916, + "loss": 2.1201, + "step": 1509 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008126251759154199, + "loss": 2.0811, + "step": 1510 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008123685928935312, + "loss": 2.0498, + "step": 1511 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008121118748843052, + "loss": 2.1074, + "step": 1512 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008118550219986807, + "loss": 2.0488, + "step": 1513 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008115980343476542, + "loss": 2.0947, + "step": 1514 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008113409120422808, + "loss": 2.1367, + "step": 1515 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008110836551936735, + "loss": 2.1436, + "step": 1516 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008108262639130037, + "loss": 2.1221, + "step": 1517 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008105687383115008, + "loss": 2.1787, + "step": 1518 + }, + { + "epoch": 0.31, + "learning_rate": 0.000810311078500452, + "loss": 2.0508, + "step": 1519 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008100532845912028, + "loss": 2.123, + "step": 1520 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008097953566951568, + "loss": 2.0, + "step": 1521 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008095372949237751, + "loss": 2.0605, + "step": 1522 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008092790993885768, + "loss": 2.1484, + "step": 1523 + }, + { + "epoch": 0.31, + "learning_rate": 0.000809020770201139, + "loss": 2.083, + "step": 1524 + }, + { + "epoch": 0.31, + "learning_rate": 0.000808762307473096, + "loss": 2.0918, + "step": 1525 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008085037113161406, + "loss": 2.2012, + "step": 1526 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008082449818420226, + "loss": 2.1006, + "step": 1527 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008079861191625497, + "loss": 2.0674, + "step": 1528 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008077271233895872, + "loss": 2.166, + "step": 1529 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008074679946350578, + "loss": 2.1387, + "step": 1530 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008072087330109418, + "loss": 2.082, + "step": 1531 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008069493386292768, + "loss": 2.1064, + "step": 1532 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008066898116021576, + "loss": 2.1895, + "step": 1533 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008064301520417367, + "loss": 2.1348, + "step": 1534 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008061703600602238, + "loss": 2.0879, + "step": 1535 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008059104357698854, + "loss": 2.1113, + "step": 1536 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008056503792830458, + "loss": 2.085, + "step": 1537 + }, + { + "epoch": 0.31, + "learning_rate": 0.000805390190712086, + "loss": 2.1113, + "step": 1538 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008051298701694441, + "loss": 2.1045, + "step": 1539 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008048694177676156, + "loss": 2.0674, + "step": 1540 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008046088336191524, + "loss": 2.0947, + "step": 1541 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008043481178366638, + "loss": 2.1592, + "step": 1542 + }, + { + "epoch": 0.31, + "learning_rate": 0.000804087270532816, + "loss": 2.1299, + "step": 1543 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008038262918203314, + "loss": 2.0566, + "step": 1544 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008035651818119902, + "loss": 2.1191, + "step": 1545 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008033039406206282, + "loss": 2.1084, + "step": 1546 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008030425683591391, + "loss": 2.1631, + "step": 1547 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008027810651404721, + "loss": 2.1279, + "step": 1548 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008025194310776338, + "loss": 2.0713, + "step": 1549 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008022576662836871, + "loss": 2.1289, + "step": 1550 + }, + { + "epoch": 0.31, + "learning_rate": 0.0008019957708717512, + "loss": 2.1133, + "step": 1551 + }, + { + "epoch": 0.31, + "learning_rate": 0.000801733744955002, + "loss": 2.0938, + "step": 1552 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008014715886466716, + "loss": 2.0752, + "step": 1553 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008012093020600486, + "loss": 2.0908, + "step": 1554 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008009468853084775, + "loss": 2.1309, + "step": 1555 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008006843385053601, + "loss": 2.0596, + "step": 1556 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008004216617641532, + "loss": 2.1475, + "step": 1557 + }, + { + "epoch": 0.32, + "learning_rate": 0.0008001588551983703, + "loss": 2.0781, + "step": 1558 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007998959189215811, + "loss": 2.1504, + "step": 1559 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007996328530474108, + "loss": 2.1309, + "step": 1560 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007993696576895415, + "loss": 2.0029, + "step": 1561 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007991063329617106, + "loss": 2.1348, + "step": 1562 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007988428789777112, + "loss": 2.1797, + "step": 1563 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007985792958513931, + "loss": 2.0928, + "step": 1564 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007983155836966613, + "loss": 2.1494, + "step": 1565 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007980517426274765, + "loss": 2.1094, + "step": 1566 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007977877727578555, + "loss": 2.1523, + "step": 1567 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007975236742018704, + "loss": 2.1787, + "step": 1568 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007972594470736492, + "loss": 2.3047, + "step": 1569 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007969950914873753, + "loss": 2.1016, + "step": 1570 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007967306075572875, + "loss": 2.0703, + "step": 1571 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007964659953976805, + "loss": 2.0742, + "step": 1572 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007962012551229038, + "loss": 2.1689, + "step": 1573 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007959363868473625, + "loss": 2.0645, + "step": 1574 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007956713906855178, + "loss": 2.0664, + "step": 1575 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007954062667518845, + "loss": 2.1709, + "step": 1576 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007951410151610343, + "loss": 2.1484, + "step": 1577 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007948756360275929, + "loss": 2.0615, + "step": 1578 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007946101294662418, + "loss": 2.0762, + "step": 1579 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007943444955917173, + "loss": 2.1797, + "step": 1580 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007940787345188106, + "loss": 2.0518, + "step": 1581 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007938128463623682, + "loss": 2.123, + "step": 1582 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007935468312372911, + "loss": 2.0674, + "step": 1583 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007932806892585356, + "loss": 2.2012, + "step": 1584 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007930144205411125, + "loss": 2.1104, + "step": 1585 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007927480252000875, + "loss": 2.1816, + "step": 1586 + }, + { + "epoch": 0.32, + "learning_rate": 0.000792481503350581, + "loss": 2.1816, + "step": 1587 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007922148551077682, + "loss": 2.0469, + "step": 1588 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007919480805868784, + "loss": 2.0869, + "step": 1589 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007916811799031963, + "loss": 2.0576, + "step": 1590 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007914141531720607, + "loss": 2.1309, + "step": 1591 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007911470005088643, + "loss": 2.0908, + "step": 1592 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007908797220290553, + "loss": 2.1191, + "step": 1593 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007906123178481357, + "loss": 2.1621, + "step": 1594 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007903447880816617, + "loss": 2.1445, + "step": 1595 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007900771328452438, + "loss": 2.1006, + "step": 1596 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007898093522545471, + "loss": 2.0469, + "step": 1597 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007895414464252906, + "loss": 2.1309, + "step": 1598 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007892734154732474, + "loss": 2.0938, + "step": 1599 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007890052595142446, + "loss": 2.1348, + "step": 1600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0007887369786641637, + "loss": 2.1113, + "step": 1601 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007884685730389397, + "loss": 2.0664, + "step": 1602 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007882000427545617, + "loss": 2.1396, + "step": 1603 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007879313879270729, + "loss": 2.1104, + "step": 1604 + }, + { + "epoch": 0.33, + "learning_rate": 0.00078766260867257, + "loss": 2.1328, + "step": 1605 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007873937051072036, + "loss": 2.0332, + "step": 1606 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007871246773471779, + "loss": 2.0508, + "step": 1607 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007868555255087509, + "loss": 2.0996, + "step": 1608 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007865862497082344, + "loss": 2.2383, + "step": 1609 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007863168500619934, + "loss": 2.0479, + "step": 1610 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007860473266864467, + "loss": 2.0674, + "step": 1611 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007857776796980663, + "loss": 2.1143, + "step": 1612 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007855079092133778, + "loss": 2.1396, + "step": 1613 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007852380153489602, + "loss": 2.1182, + "step": 1614 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007849679982214458, + "loss": 2.2148, + "step": 1615 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007846978579475201, + "loss": 2.0547, + "step": 1616 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007844275946439216, + "loss": 2.1914, + "step": 1617 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007841572084274427, + "loss": 2.085, + "step": 1618 + }, + { + "epoch": 0.33, + "learning_rate": 0.000783886699414928, + "loss": 2.168, + "step": 1619 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007836160677232758, + "loss": 2.1445, + "step": 1620 + }, + { + "epoch": 0.33, + "learning_rate": 0.000783345313469437, + "loss": 2.1338, + "step": 1621 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007830744367704159, + "loss": 1.9678, + "step": 1622 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007828034377432694, + "loss": 2.0273, + "step": 1623 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007825323165051072, + "loss": 2.0459, + "step": 1624 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007822610731730922, + "loss": 2.1182, + "step": 1625 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007819897078644396, + "loss": 2.167, + "step": 1626 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007817182206964177, + "loss": 2.1445, + "step": 1627 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007814466117863472, + "loss": 2.0576, + "step": 1628 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007811748812516012, + "loss": 2.124, + "step": 1629 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007809030292096064, + "loss": 2.1338, + "step": 1630 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007806310557778406, + "loss": 2.1562, + "step": 1631 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007803589610738351, + "loss": 2.0693, + "step": 1632 + }, + { + "epoch": 0.33, + "learning_rate": 0.000780086745215173, + "loss": 2.0889, + "step": 1633 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007798144083194903, + "loss": 2.1318, + "step": 1634 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007795419505044745, + "loss": 2.123, + "step": 1635 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007792693718878662, + "loss": 2.1162, + "step": 1636 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007789966725874577, + "loss": 2.1279, + "step": 1637 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007787238527210937, + "loss": 2.1006, + "step": 1638 + }, + { + "epoch": 0.33, + "learning_rate": 0.000778450912406671, + "loss": 2.0889, + "step": 1639 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007781778517621379, + "loss": 2.082, + "step": 1640 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007779046709054958, + "loss": 2.1289, + "step": 1641 + }, + { + "epoch": 0.33, + "learning_rate": 0.000777631369954797, + "loss": 2.1113, + "step": 1642 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007773579490281459, + "loss": 2.1426, + "step": 1643 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007770844082436996, + "loss": 2.1094, + "step": 1644 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007768107477196658, + "loss": 2.1133, + "step": 1645 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007765369675743046, + "loss": 2.0654, + "step": 1646 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007762630679259279, + "loss": 2.0791, + "step": 1647 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007759890488928987, + "loss": 2.1025, + "step": 1648 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007757149105936321, + "loss": 2.0957, + "step": 1649 + }, + { + "epoch": 0.33, + "learning_rate": 0.0007754406531465945, + "loss": 2.1465, + "step": 1650 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007751662766703038, + "loss": 2.0859, + "step": 1651 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007748917812833296, + "loss": 2.041, + "step": 1652 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007746171671042924, + "loss": 2.0693, + "step": 1653 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007743424342518643, + "loss": 2.0762, + "step": 1654 + }, + { + "epoch": 0.34, + "learning_rate": 0.000774067582844769, + "loss": 2.0693, + "step": 1655 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007737926130017808, + "loss": 2.0957, + "step": 1656 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007735175248417255, + "loss": 2.084, + "step": 1657 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007732423184834803, + "loss": 1.9834, + "step": 1658 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007729669940459729, + "loss": 2.1357, + "step": 1659 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007726915516481824, + "loss": 2.1328, + "step": 1660 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007724159914091388, + "loss": 2.082, + "step": 1661 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007721403134479234, + "loss": 2.0693, + "step": 1662 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007718645178836675, + "loss": 2.0986, + "step": 1663 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771588604835554, + "loss": 2.0898, + "step": 1664 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771312574422816, + "loss": 2.04, + "step": 1665 + }, + { + "epoch": 0.34, + "learning_rate": 0.000771036426764738, + "loss": 2.0801, + "step": 1666 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007707601619806547, + "loss": 2.0488, + "step": 1667 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007704837801899512, + "loss": 2.0205, + "step": 1668 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007702072815120637, + "loss": 2.1338, + "step": 1669 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007699306660664787, + "loss": 2.0557, + "step": 1670 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007696539339727332, + "loss": 2.1152, + "step": 1671 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007693770853504143, + "loss": 2.1191, + "step": 1672 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007691001203191597, + "loss": 2.0693, + "step": 1673 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007688230389986575, + "loss": 2.0723, + "step": 1674 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007685458415086459, + "loss": 2.1514, + "step": 1675 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007682685279689134, + "loss": 2.1104, + "step": 1676 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007679910984992988, + "loss": 2.0977, + "step": 1677 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007677135532196904, + "loss": 1.9971, + "step": 1678 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007674358922500273, + "loss": 2.1104, + "step": 1679 + }, + { + "epoch": 0.34, + "learning_rate": 0.000767158115710298, + "loss": 2.1299, + "step": 1680 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007668802237205412, + "loss": 2.0566, + "step": 1681 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007666022164008457, + "loss": 2.1494, + "step": 1682 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007663240938713497, + "loss": 2.0352, + "step": 1683 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007660458562522414, + "loss": 2.1094, + "step": 1684 + }, + { + "epoch": 0.34, + "learning_rate": 0.000765767503663759, + "loss": 2.0635, + "step": 1685 + }, + { + "epoch": 0.34, + "learning_rate": 0.00076548903622619, + "loss": 2.0957, + "step": 1686 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007652104540598712, + "loss": 2.0254, + "step": 1687 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007649317572851901, + "loss": 1.9375, + "step": 1688 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007646529460225827, + "loss": 2.0107, + "step": 1689 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007643740203925352, + "loss": 2.1836, + "step": 1690 + }, + { + "epoch": 0.34, + "learning_rate": 0.000764094980515582, + "loss": 2.1152, + "step": 1691 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007638158265123085, + "loss": 2.0732, + "step": 1692 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007635365585033487, + "loss": 2.1387, + "step": 1693 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007632571766093854, + "loss": 2.123, + "step": 1694 + }, + { + "epoch": 0.34, + "learning_rate": 0.000762977680951151, + "loss": 2.0664, + "step": 1695 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007626980716494274, + "loss": 2.0947, + "step": 1696 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007624183488250451, + "loss": 2.0918, + "step": 1697 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007621385125988839, + "loss": 2.1113, + "step": 1698 + }, + { + "epoch": 0.34, + "learning_rate": 0.0007618585630918726, + "loss": 2.1494, + "step": 1699 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007615785004249888, + "loss": 2.1543, + "step": 1700 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007612983247192593, + "loss": 2.1113, + "step": 1701 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007610180360957594, + "loss": 2.0693, + "step": 1702 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007607376346756137, + "loss": 2.0254, + "step": 1703 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007604571205799946, + "loss": 2.0752, + "step": 1704 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007601764939301241, + "loss": 2.0566, + "step": 1705 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007598957548472728, + "loss": 2.0977, + "step": 1706 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007596149034527593, + "loss": 2.1064, + "step": 1707 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007593339398679511, + "loss": 2.0449, + "step": 1708 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007590528642142642, + "loss": 2.0518, + "step": 1709 + }, + { + "epoch": 0.35, + "learning_rate": 0.000758771676613163, + "loss": 2.0654, + "step": 1710 + }, + { + "epoch": 0.35, + "learning_rate": 0.00075849037718616, + "loss": 2.1904, + "step": 1711 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007582089660548166, + "loss": 2.002, + "step": 1712 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007579274433407422, + "loss": 2.2383, + "step": 1713 + }, + { + "epoch": 0.35, + "learning_rate": 0.000757645809165594, + "loss": 2.0811, + "step": 1714 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007573640636510776, + "loss": 2.0898, + "step": 1715 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007570822069189475, + "loss": 2.1406, + "step": 1716 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007568002390910054, + "loss": 2.0625, + "step": 1717 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007565181602891009, + "loss": 2.1104, + "step": 1718 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007562359706351318, + "loss": 2.0557, + "step": 1719 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007559536702510443, + "loss": 2.2188, + "step": 1720 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007556712592588318, + "loss": 2.0957, + "step": 1721 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007553887377805358, + "loss": 2.0781, + "step": 1722 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007551061059382454, + "loss": 2.0752, + "step": 1723 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007548233638540975, + "loss": 2.0527, + "step": 1724 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007545405116502767, + "loss": 1.96, + "step": 1725 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007542575494490148, + "loss": 2.0166, + "step": 1726 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007539744773725916, + "loss": 2.0566, + "step": 1727 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007536912955433344, + "loss": 2.0723, + "step": 1728 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007534080040836173, + "loss": 2.1016, + "step": 1729 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007531246031158627, + "loss": 2.0791, + "step": 1730 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007528410927625396, + "loss": 2.0488, + "step": 1731 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007525574731461642, + "loss": 2.0889, + "step": 1732 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007522737443893007, + "loss": 2.0547, + "step": 1733 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007519899066145597, + "loss": 2.0879, + "step": 1734 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007517059599445995, + "loss": 2.0928, + "step": 1735 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007514219045021246, + "loss": 2.1699, + "step": 1736 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007511377404098876, + "loss": 2.1309, + "step": 1737 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007508534677906874, + "loss": 2.0469, + "step": 1738 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007505690867673697, + "loss": 2.1045, + "step": 1739 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007502845974628274, + "loss": 2.1006, + "step": 1740 + }, + { + "epoch": 0.35, + "learning_rate": 0.00075, + "loss": 2.0566, + "step": 1741 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007497152945018739, + "loss": 2.0264, + "step": 1742 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007494304810914819, + "loss": 2.0664, + "step": 1743 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007491455598919037, + "loss": 2.1064, + "step": 1744 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007488605310262655, + "loss": 2.1621, + "step": 1745 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007485753946177401, + "loss": 2.1299, + "step": 1746 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007482901507895463, + "loss": 2.0098, + "step": 1747 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007480047996649502, + "loss": 2.0449, + "step": 1748 + }, + { + "epoch": 0.35, + "learning_rate": 0.0007477193413672637, + "loss": 2.0518, + "step": 1749 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007474337760198447, + "loss": 2.0479, + "step": 1750 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007471481037460981, + "loss": 2.1396, + "step": 1751 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007468623246694746, + "loss": 2.0381, + "step": 1752 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007465764389134711, + "loss": 2.0518, + "step": 1753 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007462904466016305, + "loss": 2.0586, + "step": 1754 + }, + { + "epoch": 0.36, + "learning_rate": 0.000746004347857542, + "loss": 2.0938, + "step": 1755 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007457181428048405, + "loss": 2.0615, + "step": 1756 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007454318315672073, + "loss": 2.1436, + "step": 1757 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007451454142683689, + "loss": 2.1289, + "step": 1758 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007448588910320983, + "loss": 2.0459, + "step": 1759 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007445722619822137, + "loss": 2.124, + "step": 1760 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007442855272425797, + "loss": 2.0381, + "step": 1761 + }, + { + "epoch": 0.36, + "learning_rate": 0.000743998686937106, + "loss": 2.1562, + "step": 1762 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007437117411897481, + "loss": 2.1152, + "step": 1763 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007434246901245069, + "loss": 2.1475, + "step": 1764 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007431375338654295, + "loss": 2.123, + "step": 1765 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007428502725366074, + "loss": 2.1025, + "step": 1766 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007425629062621784, + "loss": 2.1201, + "step": 1767 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007422754351663251, + "loss": 2.0996, + "step": 1768 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007419878593732757, + "loss": 2.042, + "step": 1769 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007417001790073035, + "loss": 2.1074, + "step": 1770 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007414123941927272, + "loss": 2.1172, + "step": 1771 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007411245050539103, + "loss": 2.0293, + "step": 1772 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007408365117152617, + "loss": 2.127, + "step": 1773 + }, + { + "epoch": 0.36, + "learning_rate": 0.000740548414301235, + "loss": 2.085, + "step": 1774 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007402602129363288, + "loss": 2.1309, + "step": 1775 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007399719077450875, + "loss": 2.0889, + "step": 1776 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007396834988520988, + "loss": 2.0449, + "step": 1777 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007393949863819966, + "loss": 2.1748, + "step": 1778 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007391063704594589, + "loss": 2.0957, + "step": 1779 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007388176512092084, + "loss": 2.084, + "step": 1780 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007385288287560128, + "loss": 2.0576, + "step": 1781 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007382399032246841, + "loss": 2.1055, + "step": 1782 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007379508747400787, + "loss": 2.0488, + "step": 1783 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007376617434270981, + "loss": 2.0781, + "step": 1784 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007373725094106875, + "loss": 2.1387, + "step": 1785 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007370831728158371, + "loss": 2.0586, + "step": 1786 + }, + { + "epoch": 0.36, + "learning_rate": 0.000736793733767581, + "loss": 2.0557, + "step": 1787 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007365041923909978, + "loss": 2.0342, + "step": 1788 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007362145488112102, + "loss": 2.0898, + "step": 1789 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007359248031533852, + "loss": 2.1289, + "step": 1790 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007356349555427338, + "loss": 2.0635, + "step": 1791 + }, + { + "epoch": 0.36, + "learning_rate": 0.000735345006104511, + "loss": 2.167, + "step": 1792 + }, + { + "epoch": 0.36, + "learning_rate": 0.000735054954964016, + "loss": 2.0732, + "step": 1793 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007347648022465919, + "loss": 2.1084, + "step": 1794 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007344745480776256, + "loss": 2.1504, + "step": 1795 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007341841925825478, + "loss": 2.1904, + "step": 1796 + }, + { + "epoch": 0.36, + "learning_rate": 0.0007338937358868332, + "loss": 1.9873, + "step": 1797 + }, + { + "epoch": 0.36, + "learning_rate": 0.000733603178116, + "loss": 2.0586, + "step": 1798 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007333125193956101, + "loss": 2.0654, + "step": 1799 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007330217598512695, + "loss": 2.1152, + "step": 1800 + }, + { + "epoch": 0.37, + "learning_rate": 0.000732730899608627, + "loss": 2.0352, + "step": 1801 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007324399387933753, + "loss": 2.001, + "step": 1802 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007321488775312506, + "loss": 2.1543, + "step": 1803 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007318577159480327, + "loss": 2.1182, + "step": 1804 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007315664541695441, + "loss": 2.0508, + "step": 1805 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007312750923216514, + "loss": 2.0693, + "step": 1806 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007309836305302637, + "loss": 2.0947, + "step": 1807 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007306920689213343, + "loss": 2.1123, + "step": 1808 + }, + { + "epoch": 0.37, + "learning_rate": 0.000730400407620858, + "loss": 2.0684, + "step": 1809 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007301086467548744, + "loss": 2.0381, + "step": 1810 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007298167864494652, + "loss": 2.1025, + "step": 1811 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007295248268307553, + "loss": 2.0518, + "step": 1812 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007292327680249122, + "loss": 2.085, + "step": 1813 + }, + { + "epoch": 0.37, + "learning_rate": 0.000728940610158147, + "loss": 2.0, + "step": 1814 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007286483533567127, + "loss": 1.9971, + "step": 1815 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007283559977469059, + "loss": 2.1104, + "step": 1816 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007280635434550652, + "loss": 2.0518, + "step": 1817 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007277709906075722, + "loss": 2.0225, + "step": 1818 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007274783393308513, + "loss": 2.0684, + "step": 1819 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007271855897513687, + "loss": 1.9697, + "step": 1820 + }, + { + "epoch": 0.37, + "learning_rate": 0.000726892741995634, + "loss": 2.0303, + "step": 1821 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007265997961901987, + "loss": 2.0547, + "step": 1822 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007263067524616565, + "loss": 2.0576, + "step": 1823 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007260136109366439, + "loss": 2.0068, + "step": 1824 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007257203717418393, + "loss": 2.0332, + "step": 1825 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007254270350039633, + "loss": 2.0518, + "step": 1826 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007251336008497793, + "loss": 2.0615, + "step": 1827 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007248400694060917, + "loss": 2.0742, + "step": 1828 + }, + { + "epoch": 0.37, + "learning_rate": 0.000724546440799748, + "loss": 2.1357, + "step": 1829 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007242527151576368, + "loss": 2.0615, + "step": 1830 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007239588926066893, + "loss": 2.0439, + "step": 1831 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007236649732738784, + "loss": 2.166, + "step": 1832 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007233709572862187, + "loss": 2.0605, + "step": 1833 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007230768447707663, + "loss": 2.1035, + "step": 1834 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007227826358546199, + "loss": 2.1084, + "step": 1835 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007224883306649188, + "loss": 2.0713, + "step": 1836 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007221939293288444, + "loss": 2.1494, + "step": 1837 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007218994319736201, + "loss": 1.9648, + "step": 1838 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007216048387265099, + "loss": 1.9951, + "step": 1839 + }, + { + "epoch": 0.37, + "learning_rate": 0.00072131014971482, + "loss": 2.0791, + "step": 1840 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007210153650658974, + "loss": 2.0635, + "step": 1841 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007207204849071308, + "loss": 2.0254, + "step": 1842 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007204255093659501, + "loss": 2.0264, + "step": 1843 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007201304385698263, + "loss": 2.0703, + "step": 1844 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007198352726462717, + "loss": 2.0283, + "step": 1845 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007195400117228397, + "loss": 2.0342, + "step": 1846 + }, + { + "epoch": 0.37, + "learning_rate": 0.0007192446559271246, + "loss": 2.1357, + "step": 1847 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007189492053867618, + "loss": 1.9854, + "step": 1848 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007186536602294277, + "loss": 2.083, + "step": 1849 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007183580205828396, + "loss": 2.001, + "step": 1850 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007180622865747555, + "loss": 2.0576, + "step": 1851 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007177664583329742, + "loss": 2.1025, + "step": 1852 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007174705359853355, + "loss": 2.1592, + "step": 1853 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007171745196597193, + "loss": 2.0293, + "step": 1854 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007168784094840467, + "loss": 2.0938, + "step": 1855 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007165822055862791, + "loss": 2.1504, + "step": 1856 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007162859080944182, + "loss": 2.0195, + "step": 1857 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007159895171365065, + "loss": 2.0332, + "step": 1858 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007156930328406268, + "loss": 2.1592, + "step": 1859 + }, + { + "epoch": 0.38, + "learning_rate": 0.000715396455334902, + "loss": 2.0986, + "step": 1860 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007150997847474956, + "loss": 2.0615, + "step": 1861 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007148030212066112, + "loss": 2.1289, + "step": 1862 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007145061648404923, + "loss": 2.1758, + "step": 1863 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007142092157774232, + "loss": 2.0459, + "step": 1864 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007139121741457275, + "loss": 2.0635, + "step": 1865 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007136150400737691, + "loss": 2.0273, + "step": 1866 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007133178136899522, + "loss": 2.084, + "step": 1867 + }, + { + "epoch": 0.38, + "learning_rate": 0.00071302049512272, + "loss": 2.1279, + "step": 1868 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007127230845005568, + "loss": 1.9531, + "step": 1869 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007124255819519854, + "loss": 2.0361, + "step": 1870 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007121279876055693, + "loss": 2.041, + "step": 1871 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007118303015899109, + "loss": 2.0449, + "step": 1872 + }, + { + "epoch": 0.38, + "learning_rate": 0.000711532524033653, + "loss": 2.0762, + "step": 1873 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007112346550654774, + "loss": 1.9893, + "step": 1874 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007109366948141052, + "loss": 2.0596, + "step": 1875 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007106386434082979, + "loss": 2.0498, + "step": 1876 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007103405009768553, + "loss": 2.1025, + "step": 1877 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007100422676486174, + "loss": 2.0742, + "step": 1878 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007097439435524627, + "loss": 2.0654, + "step": 1879 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007094455288173097, + "loss": 2.1318, + "step": 1880 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007091470235721155, + "loss": 2.0928, + "step": 1881 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007088484279458766, + "loss": 2.0352, + "step": 1882 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007085497420676285, + "loss": 2.0928, + "step": 1883 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007082509660664456, + "loss": 2.1504, + "step": 1884 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007079521000714412, + "loss": 2.0947, + "step": 1885 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007076531442117679, + "loss": 2.0488, + "step": 1886 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007073540986166166, + "loss": 2.1133, + "step": 1887 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007070549634152172, + "loss": 2.0869, + "step": 1888 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007067557387368386, + "loss": 2.0947, + "step": 1889 + }, + { + "epoch": 0.38, + "learning_rate": 0.000706456424710788, + "loss": 2.042, + "step": 1890 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007061570214664112, + "loss": 2.0234, + "step": 1891 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007058575291330928, + "loss": 2.0615, + "step": 1892 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007055579478402556, + "loss": 2.1006, + "step": 1893 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007052582777173613, + "loss": 2.0518, + "step": 1894 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007049585188939095, + "loss": 2.0801, + "step": 1895 + }, + { + "epoch": 0.38, + "learning_rate": 0.0007046586714994383, + "loss": 2.0889, + "step": 1896 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007043587356635242, + "loss": 2.0762, + "step": 1897 + }, + { + "epoch": 0.39, + "learning_rate": 0.000704058711515782, + "loss": 2.1025, + "step": 1898 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007037585991858642, + "loss": 2.0547, + "step": 1899 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007034583988034618, + "loss": 2.1924, + "step": 1900 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007031581104983038, + "loss": 2.0889, + "step": 1901 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007028577344001573, + "loss": 2.0977, + "step": 1902 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007025572706388268, + "loss": 2.1211, + "step": 1903 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007022567193441553, + "loss": 2.041, + "step": 1904 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007019560806460235, + "loss": 2.0967, + "step": 1905 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007016553546743495, + "loss": 2.0605, + "step": 1906 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007013545415590896, + "loss": 2.1602, + "step": 1907 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007010536414302375, + "loss": 1.9336, + "step": 1908 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007007526544178243, + "loss": 1.9941, + "step": 1909 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007004515806519193, + "loss": 2.0254, + "step": 1910 + }, + { + "epoch": 0.39, + "learning_rate": 0.0007001504202626285, + "loss": 2.0586, + "step": 1911 + }, + { + "epoch": 0.39, + "learning_rate": 0.000699849173380096, + "loss": 2.0889, + "step": 1912 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006995478401345028, + "loss": 2.082, + "step": 1913 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006992464206560675, + "loss": 2.125, + "step": 1914 + }, + { + "epoch": 0.39, + "learning_rate": 0.000698944915075046, + "loss": 2.0879, + "step": 1915 + }, + { + "epoch": 0.39, + "learning_rate": 0.000698643323521731, + "loss": 2.0439, + "step": 1916 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006983416461264527, + "loss": 2.0264, + "step": 1917 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006980398830195785, + "loss": 2.1162, + "step": 1918 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006977380343315125, + "loss": 2.0264, + "step": 1919 + }, + { + "epoch": 0.39, + "learning_rate": 0.000697436100192696, + "loss": 2.0156, + "step": 1920 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006971340807336072, + "loss": 2.0205, + "step": 1921 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006968319760847613, + "loss": 2.0381, + "step": 1922 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006965297863767097, + "loss": 1.9639, + "step": 1923 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006962275117400415, + "loss": 2.0625, + "step": 1924 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006959251523053819, + "loss": 2.127, + "step": 1925 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006956227082033929, + "loss": 2.0625, + "step": 1926 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006953201795647728, + "loss": 2.0996, + "step": 1927 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006950175665202568, + "loss": 2.0781, + "step": 1928 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006947148692006169, + "loss": 2.0713, + "step": 1929 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006944120877366604, + "loss": 2.0605, + "step": 1930 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006941092222592319, + "loss": 1.9629, + "step": 1931 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006938062728992123, + "loss": 2.0664, + "step": 1932 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006935032397875182, + "loss": 1.9453, + "step": 1933 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006932001230551028, + "loss": 2.0332, + "step": 1934 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006928969228329554, + "loss": 2.1221, + "step": 1935 + }, + { + "epoch": 0.39, + "learning_rate": 0.000692593639252101, + "loss": 2.1445, + "step": 1936 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006922902724436016, + "loss": 2.0703, + "step": 1937 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006919868225385536, + "loss": 2.1719, + "step": 1938 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006916832896680908, + "loss": 2.0361, + "step": 1939 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006913796739633822, + "loss": 2.0605, + "step": 1940 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006910759755556325, + "loss": 2.0449, + "step": 1941 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006907721945760823, + "loss": 1.9736, + "step": 1942 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006904683311560081, + "loss": 2.0391, + "step": 1943 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006901643854267214, + "loss": 2.0176, + "step": 1944 + }, + { + "epoch": 0.39, + "learning_rate": 0.0006898603575195701, + "loss": 2.0771, + "step": 1945 + }, + { + "epoch": 0.39, + "learning_rate": 0.000689556247565937, + "loss": 2.0186, + "step": 1946 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006892520556972403, + "loss": 2.0186, + "step": 1947 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006889477820449342, + "loss": 2.1201, + "step": 1948 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006886434267405078, + "loss": 2.0928, + "step": 1949 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006883389899154856, + "loss": 2.1406, + "step": 1950 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006880344717014271, + "loss": 1.9775, + "step": 1951 + }, + { + "epoch": 0.4, + "learning_rate": 0.000687729872229927, + "loss": 2.0918, + "step": 1952 + }, + { + "epoch": 0.4, + "learning_rate": 0.000687425191632616, + "loss": 2.0576, + "step": 1953 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006871204300411584, + "loss": 2.1602, + "step": 1954 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006868155875872546, + "loss": 2.2031, + "step": 1955 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006865106644026396, + "loss": 2.1123, + "step": 1956 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006862056606190829, + "loss": 2.0195, + "step": 1957 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006859005763683899, + "loss": 2.0977, + "step": 1958 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006855954117823993, + "loss": 2.0977, + "step": 1959 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006852901669929856, + "loss": 2.0986, + "step": 1960 + }, + { + "epoch": 0.4, + "learning_rate": 0.000684984842132058, + "loss": 2.1445, + "step": 1961 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006846794373315593, + "loss": 2.0752, + "step": 1962 + }, + { + "epoch": 0.4, + "learning_rate": 0.000684373952723468, + "loss": 2.0508, + "step": 1963 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006840683884397965, + "loss": 2.0791, + "step": 1964 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006837627446125916, + "loss": 2.0693, + "step": 1965 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006834570213739346, + "loss": 2.0264, + "step": 1966 + }, + { + "epoch": 0.4, + "learning_rate": 0.000683151218855941, + "loss": 2.041, + "step": 1967 + }, + { + "epoch": 0.4, + "learning_rate": 0.000682845337190761, + "loss": 2.1426, + "step": 1968 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006825393765105782, + "loss": 2.001, + "step": 1969 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006822333369476113, + "loss": 2.0664, + "step": 1970 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006819272186341123, + "loss": 2.1562, + "step": 1971 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006816210217023675, + "loss": 2.1113, + "step": 1972 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006813147462846974, + "loss": 2.124, + "step": 1973 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006810083925134561, + "loss": 1.9893, + "step": 1974 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006807019605210319, + "loss": 1.9541, + "step": 1975 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006803954504398462, + "loss": 1.9932, + "step": 1976 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006800888624023553, + "loss": 2.0771, + "step": 1977 + }, + { + "epoch": 0.4, + "learning_rate": 0.000679782196541048, + "loss": 2.1074, + "step": 1978 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006794754529884473, + "loss": 2.0146, + "step": 1979 + }, + { + "epoch": 0.4, + "learning_rate": 0.00067916863187711, + "loss": 2.0332, + "step": 1980 + }, + { + "epoch": 0.4, + "learning_rate": 0.000678861733339626, + "loss": 2.0576, + "step": 1981 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006785547575086187, + "loss": 2.0312, + "step": 1982 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006782477045167451, + "loss": 2.0137, + "step": 1983 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006779405744966955, + "loss": 2.083, + "step": 1984 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006776333675811934, + "loss": 2.085, + "step": 1985 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006773260839029955, + "loss": 2.0908, + "step": 1986 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006770187235948916, + "loss": 2.0312, + "step": 1987 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006767112867897051, + "loss": 2.082, + "step": 1988 + }, + { + "epoch": 0.4, + "learning_rate": 0.000676403773620292, + "loss": 2.0664, + "step": 1989 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006760961842195413, + "loss": 2.0312, + "step": 1990 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006757885187203748, + "loss": 2.0859, + "step": 1991 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006754807772557479, + "loss": 2.04, + "step": 1992 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006751729599586483, + "loss": 2.0674, + "step": 1993 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006748650669620964, + "loss": 2.0254, + "step": 1994 + }, + { + "epoch": 0.4, + "learning_rate": 0.0006745570983991454, + "loss": 2.0488, + "step": 1995 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006742490544028814, + "loss": 2.1641, + "step": 1996 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006739409351064226, + "loss": 1.9443, + "step": 1997 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006736327406429202, + "loss": 2.0371, + "step": 1998 + }, + { + "epoch": 0.41, + "learning_rate": 0.000673324471145558, + "loss": 2.0449, + "step": 1999 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006730161267475515, + "loss": 2.1338, + "step": 2000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006727077075821493, + "loss": 1.9824, + "step": 2001 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006723992137826319, + "loss": 2.042, + "step": 2002 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006720906454823121, + "loss": 2.1094, + "step": 2003 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006717820028145352, + "loss": 2.0693, + "step": 2004 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006714732859126782, + "loss": 2.0605, + "step": 2005 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006711644949101505, + "loss": 2.0225, + "step": 2006 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006708556299403935, + "loss": 2.0635, + "step": 2007 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006705466911368803, + "loss": 2.042, + "step": 2008 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006702376786331164, + "loss": 2.043, + "step": 2009 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006699285925626383, + "loss": 2.0576, + "step": 2010 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006696194330590151, + "loss": 2.0928, + "step": 2011 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006693102002558475, + "loss": 2.0303, + "step": 2012 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006690008942867677, + "loss": 2.0332, + "step": 2013 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006686915152854393, + "loss": 2.0342, + "step": 2014 + }, + { + "epoch": 0.41, + "learning_rate": 0.000668382063385558, + "loss": 2.0859, + "step": 2015 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006680725387208505, + "loss": 2.0312, + "step": 2016 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006677629414250753, + "loss": 1.9346, + "step": 2017 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006674532716320219, + "loss": 2.1807, + "step": 2018 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006671435294755118, + "loss": 2.0371, + "step": 2019 + }, + { + "epoch": 0.41, + "learning_rate": 0.000666833715089397, + "loss": 2.0703, + "step": 2020 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006665238286075612, + "loss": 2.1387, + "step": 2021 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006662138701639191, + "loss": 2.04, + "step": 2022 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006659038398924165, + "loss": 2.1367, + "step": 2023 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006655937379270303, + "loss": 2.0293, + "step": 2024 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006652835644017682, + "loss": 2.1064, + "step": 2025 + }, + { + "epoch": 0.41, + "learning_rate": 0.000664973319450669, + "loss": 2.1016, + "step": 2026 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006646630032078024, + "loss": 2.0469, + "step": 2027 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006643526158072688, + "loss": 2.0596, + "step": 2028 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006640421573831994, + "loss": 2.1494, + "step": 2029 + }, + { + "epoch": 0.41, + "learning_rate": 0.000663731628069756, + "loss": 2.0723, + "step": 2030 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006634210280011312, + "loss": 2.0898, + "step": 2031 + }, + { + "epoch": 0.41, + "learning_rate": 0.000663110357311548, + "loss": 2.0918, + "step": 2032 + }, + { + "epoch": 0.41, + "learning_rate": 0.00066279961613526, + "loss": 2.0254, + "step": 2033 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006624888046065516, + "loss": 2.0693, + "step": 2034 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006621779228597368, + "loss": 2.0244, + "step": 2035 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006618669710291606, + "loss": 2.0576, + "step": 2036 + }, + { + "epoch": 0.41, + "learning_rate": 0.000661555949249198, + "loss": 2.1299, + "step": 2037 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006612448576542544, + "loss": 2.1006, + "step": 2038 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006609336963787655, + "loss": 2.043, + "step": 2039 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006606224655571966, + "loss": 2.0625, + "step": 2040 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006603111653240433, + "loss": 2.0566, + "step": 2041 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006599997958138315, + "loss": 2.1348, + "step": 2042 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006596883571611168, + "loss": 2.0859, + "step": 2043 + }, + { + "epoch": 0.41, + "learning_rate": 0.0006593768495004848, + "loss": 2.0352, + "step": 2044 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006590652729665503, + "loss": 2.127, + "step": 2045 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006587536276939585, + "loss": 2.0312, + "step": 2046 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006584419138173845, + "loss": 1.9766, + "step": 2047 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006581301314715325, + "loss": 2.0508, + "step": 2048 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006578182807911363, + "loss": 2.0371, + "step": 2049 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006575063619109599, + "loss": 2.0537, + "step": 2050 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006571943749657959, + "loss": 2.0889, + "step": 2051 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006568823200904667, + "loss": 2.0166, + "step": 2052 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006565701974198244, + "loss": 2.0801, + "step": 2053 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006562580070887498, + "loss": 2.0078, + "step": 2054 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006559457492321533, + "loss": 2.0781, + "step": 2055 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006556334239849743, + "loss": 1.9902, + "step": 2056 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006553210314821814, + "loss": 1.9727, + "step": 2057 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006550085718587724, + "loss": 2.084, + "step": 2058 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006546960452497738, + "loss": 1.9941, + "step": 2059 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006543834517902415, + "loss": 1.9961, + "step": 2060 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006540707916152598, + "loss": 2.0566, + "step": 2061 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006537580648599419, + "loss": 2.0879, + "step": 2062 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006534452716594301, + "loss": 2.0918, + "step": 2063 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006531324121488954, + "loss": 2.0381, + "step": 2064 + }, + { + "epoch": 0.42, + "learning_rate": 0.000652819486463537, + "loss": 2.0586, + "step": 2065 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006525064947385832, + "loss": 2.0303, + "step": 2066 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006521934371092901, + "loss": 2.0947, + "step": 2067 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006518803137109437, + "loss": 2.0801, + "step": 2068 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006515671246788567, + "loss": 2.0625, + "step": 2069 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006512538701483712, + "loss": 1.9912, + "step": 2070 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006509405502548578, + "loss": 2.0078, + "step": 2071 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006506271651337144, + "loss": 2.0127, + "step": 2072 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006503137149203679, + "loss": 2.0215, + "step": 2073 + }, + { + "epoch": 0.42, + "learning_rate": 0.000650000199750273, + "loss": 2.1064, + "step": 2074 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006496866197589125, + "loss": 2.0947, + "step": 2075 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006493729750817974, + "loss": 2.0674, + "step": 2076 + }, + { + "epoch": 0.42, + "learning_rate": 0.000649059265854466, + "loss": 2.1055, + "step": 2077 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006487454922124854, + "loss": 2.0371, + "step": 2078 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006484316542914502, + "loss": 1.9561, + "step": 2079 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006481177522269824, + "loss": 2.0049, + "step": 2080 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006478037861547321, + "loss": 2.1172, + "step": 2081 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006474897562103771, + "loss": 2.0713, + "step": 2082 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006471756625296225, + "loss": 2.0449, + "step": 2083 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006468615052482011, + "loss": 2.0479, + "step": 2084 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006465472845018735, + "loss": 2.0879, + "step": 2085 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006462330004264272, + "loss": 2.0762, + "step": 2086 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006459186531576771, + "loss": 2.167, + "step": 2087 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006456042428314661, + "loss": 2.0244, + "step": 2088 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006452897695836635, + "loss": 1.9688, + "step": 2089 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006449752335501662, + "loss": 2.0557, + "step": 2090 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006446606348668982, + "loss": 2.0137, + "step": 2091 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006443459736698105, + "loss": 2.1084, + "step": 2092 + }, + { + "epoch": 0.42, + "learning_rate": 0.0006440312500948814, + "loss": 2.0898, + "step": 2093 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006437164642781155, + "loss": 2.0107, + "step": 2094 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006434016163555452, + "loss": 2.0244, + "step": 2095 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006430867064632289, + "loss": 2.0938, + "step": 2096 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006427717347372523, + "loss": 1.9639, + "step": 2097 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006424567013137276, + "loss": 2.1016, + "step": 2098 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006421416063287937, + "loss": 1.9219, + "step": 2099 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006418264499186162, + "loss": 2.0166, + "step": 2100 + }, + { + "epoch": 0.43, + "learning_rate": 0.000641511232219387, + "loss": 2.1084, + "step": 2101 + }, + { + "epoch": 0.43, + "learning_rate": 0.000641195953367325, + "loss": 1.9727, + "step": 2102 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006408806134986749, + "loss": 2.0195, + "step": 2103 + }, + { + "epoch": 0.43, + "learning_rate": 0.000640565212749708, + "loss": 2.0039, + "step": 2104 + }, + { + "epoch": 0.43, + "learning_rate": 0.000640249751256722, + "loss": 2.0352, + "step": 2105 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006399342291560411, + "loss": 2.0576, + "step": 2106 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006396186465840152, + "loss": 2.041, + "step": 2107 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006393030036770203, + "loss": 1.9531, + "step": 2108 + }, + { + "epoch": 0.43, + "learning_rate": 0.000638987300571459, + "loss": 2.0312, + "step": 2109 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006386715374037595, + "loss": 2.0303, + "step": 2110 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006383557143103762, + "loss": 2.1758, + "step": 2111 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006380398314277889, + "loss": 2.0615, + "step": 2112 + }, + { + "epoch": 0.43, + "learning_rate": 0.000637723888892504, + "loss": 2.0049, + "step": 2113 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006374078868410534, + "loss": 2.0635, + "step": 2114 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006370918254099939, + "loss": 2.0605, + "step": 2115 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006367757047359093, + "loss": 2.0273, + "step": 2116 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006364595249554084, + "loss": 2.0703, + "step": 2117 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006361432862051251, + "loss": 2.1406, + "step": 2118 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006358269886217194, + "loss": 2.0664, + "step": 2119 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006355106323418766, + "loss": 1.9521, + "step": 2120 + }, + { + "epoch": 0.43, + "learning_rate": 0.000635194217502307, + "loss": 2.0654, + "step": 2121 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006348777442397467, + "loss": 2.043, + "step": 2122 + }, + { + "epoch": 0.43, + "learning_rate": 0.000634561212690957, + "loss": 2.0283, + "step": 2123 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006342446229927239, + "loss": 2.0928, + "step": 2124 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006339279752818591, + "loss": 2.0771, + "step": 2125 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006336112696951988, + "loss": 2.0312, + "step": 2126 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006332945063696048, + "loss": 2.0264, + "step": 2127 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006329776854419636, + "loss": 2.0322, + "step": 2128 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006326608070491864, + "loss": 2.0967, + "step": 2129 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006323438713282095, + "loss": 2.1104, + "step": 2130 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006320268784159939, + "loss": 2.0752, + "step": 2131 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006317098284495251, + "loss": 2.0693, + "step": 2132 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006313927215658138, + "loss": 2.0908, + "step": 2133 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006310755579018948, + "loss": 2.0508, + "step": 2134 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006307583375948273, + "loss": 2.0117, + "step": 2135 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006304410607816957, + "loss": 2.1191, + "step": 2136 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006301237275996082, + "loss": 2.0615, + "step": 2137 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006298063381856977, + "loss": 2.0254, + "step": 2138 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006294888926771208, + "loss": 2.0605, + "step": 2139 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006291713912110591, + "loss": 2.0713, + "step": 2140 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006288538339247182, + "loss": 2.0488, + "step": 2141 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006285362209553273, + "loss": 1.959, + "step": 2142 + }, + { + "epoch": 0.43, + "learning_rate": 0.0006282185524401404, + "loss": 2.0566, + "step": 2143 + }, + { + "epoch": 0.44, + "learning_rate": 0.000627900828516435, + "loss": 2.0479, + "step": 2144 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006275830493215127, + "loss": 2.0459, + "step": 2145 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006272652149926989, + "loss": 2.0781, + "step": 2146 + }, + { + "epoch": 0.44, + "learning_rate": 0.000626947325667343, + "loss": 1.9443, + "step": 2147 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006266293814828181, + "loss": 2.04, + "step": 2148 + }, + { + "epoch": 0.44, + "learning_rate": 0.000626311382576521, + "loss": 1.9629, + "step": 2149 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006259933290858719, + "loss": 2.0479, + "step": 2150 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006256752211483151, + "loss": 2.0137, + "step": 2151 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006253570589013182, + "loss": 2.0137, + "step": 2152 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006250388424823719, + "loss": 2.0527, + "step": 2153 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006247205720289907, + "loss": 2.0859, + "step": 2154 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006244022476787124, + "loss": 2.0469, + "step": 2155 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006240838695690983, + "loss": 1.9951, + "step": 2156 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006237654378377324, + "loss": 2.0527, + "step": 2157 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006234469526222223, + "loss": 2.0322, + "step": 2158 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006231284140601985, + "loss": 2.0732, + "step": 2159 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006228098222893148, + "loss": 2.0059, + "step": 2160 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006224911774472476, + "loss": 1.9941, + "step": 2161 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006221724796716966, + "loss": 2.0146, + "step": 2162 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006218537291003842, + "loss": 2.1162, + "step": 2163 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006215349258710558, + "loss": 2.0537, + "step": 2164 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006212160701214793, + "loss": 1.9971, + "step": 2165 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006208971619894452, + "loss": 1.9971, + "step": 2166 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006205782016127672, + "loss": 2.0068, + "step": 2167 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006202591891292809, + "loss": 2.0586, + "step": 2168 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006199401246768449, + "loss": 2.0732, + "step": 2169 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006196210083933403, + "loss": 2.0762, + "step": 2170 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006193018404166698, + "loss": 2.0547, + "step": 2171 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006189826208847596, + "loss": 2.0596, + "step": 2172 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006186633499355575, + "loss": 2.0479, + "step": 2173 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006183440277070334, + "loss": 1.9805, + "step": 2174 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006180246543371797, + "loss": 1.9707, + "step": 2175 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006177052299640109, + "loss": 2.0566, + "step": 2176 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006173857547255633, + "loss": 2.0293, + "step": 2177 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006170662287598955, + "loss": 1.9697, + "step": 2178 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006167466522050875, + "loss": 2.082, + "step": 2179 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006164270251992418, + "loss": 1.9033, + "step": 2180 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006161073478804822, + "loss": 2.0801, + "step": 2181 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006157876203869546, + "loss": 2.0635, + "step": 2182 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006154678428568262, + "loss": 2.0439, + "step": 2183 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006151480154282865, + "loss": 2.041, + "step": 2184 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006148281382395455, + "loss": 2.0156, + "step": 2185 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006145082114288357, + "loss": 1.9707, + "step": 2186 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006141882351344105, + "loss": 2.0918, + "step": 2187 + }, + { + "epoch": 0.44, + "learning_rate": 0.000613868209494545, + "loss": 1.9482, + "step": 2188 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006135481346475352, + "loss": 1.9824, + "step": 2189 + }, + { + "epoch": 0.44, + "learning_rate": 0.000613228010731699, + "loss": 2.0508, + "step": 2190 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006129078378853748, + "loss": 2.0449, + "step": 2191 + }, + { + "epoch": 0.44, + "learning_rate": 0.0006125876162469226, + "loss": 2.1182, + "step": 2192 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006122673459547233, + "loss": 1.9883, + "step": 2193 + }, + { + "epoch": 0.45, + "learning_rate": 0.000611947027147179, + "loss": 2.0264, + "step": 2194 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006116266599627126, + "loss": 2.0527, + "step": 2195 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006113062445397679, + "loss": 2.1123, + "step": 2196 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006109857810168098, + "loss": 2.0205, + "step": 2197 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006106652695323236, + "loss": 2.0039, + "step": 2198 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006103447102248153, + "loss": 2.0029, + "step": 2199 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006100241032328124, + "loss": 1.9961, + "step": 2200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006097034486948618, + "loss": 2.0449, + "step": 2201 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006093827467495319, + "loss": 2.0781, + "step": 2202 + }, + { + "epoch": 0.45, + "learning_rate": 0.000609061997535411, + "loss": 2.0381, + "step": 2203 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006087412011911083, + "loss": 1.9395, + "step": 2204 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006084203578552528, + "loss": 2.0615, + "step": 2205 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006080994676664943, + "loss": 2.0254, + "step": 2206 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006077785307635026, + "loss": 2.04, + "step": 2207 + }, + { + "epoch": 0.45, + "learning_rate": 0.000607457547284968, + "loss": 2.0674, + "step": 2208 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006071365173696003, + "loss": 2.0215, + "step": 2209 + }, + { + "epoch": 0.45, + "learning_rate": 0.00060681544115613, + "loss": 2.043, + "step": 2210 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006064943187833074, + "loss": 2.0605, + "step": 2211 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006061731503899024, + "loss": 1.9619, + "step": 2212 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006058519361147054, + "loss": 2.0576, + "step": 2213 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006055306760965263, + "loss": 2.0137, + "step": 2214 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006052093704741945, + "loss": 2.0449, + "step": 2215 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006048880193865598, + "loss": 1.9971, + "step": 2216 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006045666229724908, + "loss": 1.9434, + "step": 2217 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006042451813708766, + "loss": 2.1191, + "step": 2218 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006039236947206252, + "loss": 1.9434, + "step": 2219 + }, + { + "epoch": 0.45, + "learning_rate": 0.000603602163160664, + "loss": 2.0059, + "step": 2220 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006032805868299402, + "loss": 1.9248, + "step": 2221 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006029589658674202, + "loss": 1.9951, + "step": 2222 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006026373004120896, + "loss": 2.1191, + "step": 2223 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006023155906029534, + "loss": 2.0615, + "step": 2224 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006019938365790356, + "loss": 2.0498, + "step": 2225 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006016720384793793, + "loss": 1.9854, + "step": 2226 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006013501964430468, + "loss": 2.0664, + "step": 2227 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006010283106091194, + "loss": 1.9609, + "step": 2228 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006007063811166969, + "loss": 2.0684, + "step": 2229 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006003844081048985, + "loss": 1.9336, + "step": 2230 + }, + { + "epoch": 0.45, + "learning_rate": 0.0006000623917128622, + "loss": 2.082, + "step": 2231 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005997403320797443, + "loss": 2.0615, + "step": 2232 + }, + { + "epoch": 0.45, + "learning_rate": 0.00059941822934472, + "loss": 1.9434, + "step": 2233 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005990960836469832, + "loss": 2.0029, + "step": 2234 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005987738951257467, + "loss": 2.0674, + "step": 2235 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005984516639202408, + "loss": 2.0361, + "step": 2236 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005981293901697152, + "loss": 1.9824, + "step": 2237 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005978070740134379, + "loss": 1.9854, + "step": 2238 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005974847155906944, + "loss": 2.0674, + "step": 2239 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005971623150407896, + "loss": 2.0186, + "step": 2240 + }, + { + "epoch": 0.45, + "learning_rate": 0.0005968398725030459, + "loss": 2.0557, + "step": 2241 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005965173881168037, + "loss": 2.0488, + "step": 2242 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005961948620214223, + "loss": 1.9951, + "step": 2243 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005958722943562782, + "loss": 1.9658, + "step": 2244 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005955496852607661, + "loss": 2.0596, + "step": 2245 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005952270348742991, + "loss": 2.0645, + "step": 2246 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005949043433363072, + "loss": 2.0205, + "step": 2247 + }, + { + "epoch": 0.46, + "learning_rate": 0.000594581610786239, + "loss": 1.9775, + "step": 2248 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005942588373635606, + "loss": 2.0215, + "step": 2249 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005939360232077554, + "loss": 2.1055, + "step": 2250 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005936131684583249, + "loss": 1.9531, + "step": 2251 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005932902732547879, + "loss": 1.998, + "step": 2252 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005929673377366808, + "loss": 2.1279, + "step": 2253 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005926443620435572, + "loss": 2.0664, + "step": 2254 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005923213463149882, + "loss": 2.0117, + "step": 2255 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005919982906905625, + "loss": 2.0645, + "step": 2256 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005916751953098853, + "loss": 2.0361, + "step": 2257 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005913520603125794, + "loss": 1.959, + "step": 2258 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005910288858382855, + "loss": 2.0166, + "step": 2259 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005907056720266598, + "loss": 2.0625, + "step": 2260 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005903824190173765, + "loss": 2.0166, + "step": 2261 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005900591269501268, + "loss": 1.9932, + "step": 2262 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005897357959646183, + "loss": 2.0762, + "step": 2263 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005894124262005758, + "loss": 2.1182, + "step": 2264 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005890890177977403, + "loss": 2.043, + "step": 2265 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005887655708958704, + "loss": 1.9424, + "step": 2266 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005884420856347405, + "loss": 1.9473, + "step": 2267 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005881185621541419, + "loss": 2.0215, + "step": 2268 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005877950005938827, + "loss": 2.0918, + "step": 2269 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005874714010937871, + "loss": 2.0146, + "step": 2270 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005871477637936955, + "loss": 2.0195, + "step": 2271 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005868240888334653, + "loss": 1.9873, + "step": 2272 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005865003763529694, + "loss": 2.083, + "step": 2273 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005861766264920975, + "loss": 1.9863, + "step": 2274 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005858528393907552, + "loss": 2.0059, + "step": 2275 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005855290151888644, + "loss": 2.0312, + "step": 2276 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005852051540263627, + "loss": 2.0469, + "step": 2277 + }, + { + "epoch": 0.46, + "learning_rate": 0.000584881256043204, + "loss": 1.9844, + "step": 2278 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005845573213793577, + "loss": 2.1035, + "step": 2279 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005842333501748096, + "loss": 2.0625, + "step": 2280 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005839093425695609, + "loss": 1.9668, + "step": 2281 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005835852987036285, + "loss": 2.1484, + "step": 2282 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005832612187170453, + "loss": 2.0791, + "step": 2283 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005829371027498596, + "loss": 1.9785, + "step": 2284 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005826129509421351, + "loss": 1.9893, + "step": 2285 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005822887634339512, + "loss": 2.0889, + "step": 2286 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005819645403654027, + "loss": 1.9629, + "step": 2287 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005816402818766, + "loss": 2.0312, + "step": 2288 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005813159881076681, + "loss": 1.9268, + "step": 2289 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005809916591987479, + "loss": 2.0195, + "step": 2290 + }, + { + "epoch": 0.46, + "learning_rate": 0.0005806672952899954, + "loss": 1.9971, + "step": 2291 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005803428965215815, + "loss": 2.0342, + "step": 2292 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005800184630336922, + "loss": 1.9941, + "step": 2293 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005796939949665287, + "loss": 2.1299, + "step": 2294 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005793694924603071, + "loss": 2.0332, + "step": 2295 + }, + { + "epoch": 0.47, + "learning_rate": 0.000579044955655258, + "loss": 1.9902, + "step": 2296 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005787203846916272, + "loss": 2.0869, + "step": 2297 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005783957797096755, + "loss": 2.0029, + "step": 2298 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005780711408496777, + "loss": 1.9688, + "step": 2299 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005777464682519239, + "loss": 1.9463, + "step": 2300 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005774217620567183, + "loss": 2.0088, + "step": 2301 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005770970224043802, + "loss": 2.0107, + "step": 2302 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005767722494352422, + "loss": 2.0342, + "step": 2303 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005764474432896528, + "loss": 1.915, + "step": 2304 + }, + { + "epoch": 0.47, + "learning_rate": 0.000576122604107974, + "loss": 2.0039, + "step": 2305 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005757977320305821, + "loss": 2.043, + "step": 2306 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005754728271978675, + "loss": 2.0918, + "step": 2307 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005751478897502352, + "loss": 1.9834, + "step": 2308 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005748229198281041, + "loss": 1.9404, + "step": 2309 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005744979175719069, + "loss": 1.9775, + "step": 2310 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005741728831220907, + "loss": 2.0029, + "step": 2311 + }, + { + "epoch": 0.47, + "learning_rate": 0.000573847816619116, + "loss": 1.9746, + "step": 2312 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005735227182034577, + "loss": 2.0576, + "step": 2313 + }, + { + "epoch": 0.47, + "learning_rate": 0.000573197588015604, + "loss": 1.999, + "step": 2314 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005728724261960572, + "loss": 2.0215, + "step": 2315 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005725472328853329, + "loss": 2.0391, + "step": 2316 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005722220082239607, + "loss": 1.9932, + "step": 2317 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005718967523524837, + "loss": 2.0234, + "step": 2318 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005715714654114581, + "loss": 2.0068, + "step": 2319 + }, + { + "epoch": 0.47, + "learning_rate": 0.000571246147541454, + "loss": 1.9463, + "step": 2320 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005709207988830545, + "loss": 2.0498, + "step": 2321 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005705954195768561, + "loss": 2.0332, + "step": 2322 + }, + { + "epoch": 0.47, + "learning_rate": 0.000570270009763469, + "loss": 2.0674, + "step": 2323 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005699445695835155, + "loss": 2.0869, + "step": 2324 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005696190991776323, + "loss": 2.0635, + "step": 2325 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005692935986864685, + "loss": 2.0732, + "step": 2326 + }, + { + "epoch": 0.47, + "learning_rate": 0.000568968068250686, + "loss": 1.9609, + "step": 2327 + }, + { + "epoch": 0.47, + "learning_rate": 0.00056864250801096, + "loss": 1.9893, + "step": 2328 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005683169181079787, + "loss": 2.0312, + "step": 2329 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005679912986824427, + "loss": 2.0273, + "step": 2330 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005676656498750656, + "loss": 1.9863, + "step": 2331 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005673399718265737, + "loss": 2.0166, + "step": 2332 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005670142646777059, + "loss": 1.9766, + "step": 2333 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005666885285692137, + "loss": 2.0186, + "step": 2334 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005663627636418611, + "loss": 1.9658, + "step": 2335 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005660369700364242, + "loss": 1.9658, + "step": 2336 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005657111478936926, + "loss": 2.0078, + "step": 2337 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005653852973544666, + "loss": 2.0986, + "step": 2338 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005650594185595604, + "loss": 2.0, + "step": 2339 + }, + { + "epoch": 0.47, + "learning_rate": 0.0005647335116497992, + "loss": 2.0469, + "step": 2340 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005644075767660209, + "loss": 2.1289, + "step": 2341 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005640816140490753, + "loss": 1.9541, + "step": 2342 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005637556236398246, + "loss": 2.0449, + "step": 2343 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005634296056791423, + "loss": 2.0518, + "step": 2344 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005631035603079146, + "loss": 1.9492, + "step": 2345 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005627774876670388, + "loss": 2.0254, + "step": 2346 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005624513878974244, + "loss": 2.0908, + "step": 2347 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005621252611399927, + "loss": 1.9893, + "step": 2348 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005617991075356763, + "loss": 2.0322, + "step": 2349 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005614729272254199, + "loss": 2.0557, + "step": 2350 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005611467203501791, + "loss": 2.0469, + "step": 2351 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005608204870509214, + "loss": 1.998, + "step": 2352 + }, + { + "epoch": 0.48, + "learning_rate": 0.000560494227468626, + "loss": 2.125, + "step": 2353 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005601679417442827, + "loss": 2.1152, + "step": 2354 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005598416300188931, + "loss": 2.0352, + "step": 2355 + }, + { + "epoch": 0.48, + "learning_rate": 0.00055951529243347, + "loss": 2.0117, + "step": 2356 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005591889291290373, + "loss": 2.0566, + "step": 2357 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005588625402466302, + "loss": 1.9902, + "step": 2358 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005585361259272943, + "loss": 2.0801, + "step": 2359 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005582096863120872, + "loss": 2.0049, + "step": 2360 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005578832215420767, + "loss": 1.9766, + "step": 2361 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005575567317583414, + "loss": 1.9658, + "step": 2362 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005572302171019715, + "loss": 1.9775, + "step": 2363 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005569036777140672, + "loss": 2.1045, + "step": 2364 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005565771137357395, + "loss": 2.002, + "step": 2365 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005562505253081103, + "loss": 2.0957, + "step": 2366 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005559239125723119, + "loss": 2.0088, + "step": 2367 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005555972756694869, + "loss": 2.0107, + "step": 2368 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005552706147407888, + "loss": 2.0605, + "step": 2369 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005549439299273814, + "loss": 2.0781, + "step": 2370 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005546172213704383, + "loss": 1.9805, + "step": 2371 + }, + { + "epoch": 0.48, + "learning_rate": 0.000554290489211144, + "loss": 1.9834, + "step": 2372 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005539637335906926, + "loss": 1.9521, + "step": 2373 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005536369546502891, + "loss": 2.1377, + "step": 2374 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005533101525311477, + "loss": 1.9531, + "step": 2375 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005529833273744933, + "loss": 1.9678, + "step": 2376 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005526564793215606, + "loss": 2.0039, + "step": 2377 + }, + { + "epoch": 0.48, + "learning_rate": 0.000552329608513594, + "loss": 2.0166, + "step": 2378 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005520027150918477, + "loss": 2.0547, + "step": 2379 + }, + { + "epoch": 0.48, + "learning_rate": 0.000551675799197586, + "loss": 2.0742, + "step": 2380 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005513488609720827, + "loss": 2.0811, + "step": 2381 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005510219005566214, + "loss": 2.0918, + "step": 2382 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005506949180924947, + "loss": 1.9629, + "step": 2383 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005503679137210057, + "loss": 2.0547, + "step": 2384 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005500408875834664, + "loss": 2.043, + "step": 2385 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005497138398211979, + "loss": 1.9678, + "step": 2386 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005493867705755312, + "loss": 2.0615, + "step": 2387 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005490596799878067, + "loss": 2.1045, + "step": 2388 + }, + { + "epoch": 0.48, + "learning_rate": 0.0005487325681993732, + "loss": 1.9668, + "step": 2389 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005484054353515896, + "loss": 2.0293, + "step": 2390 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005480782815858233, + "loss": 2.0381, + "step": 2391 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005477511070434509, + "loss": 1.9727, + "step": 2392 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005474239118658579, + "loss": 2.0225, + "step": 2393 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005470966961944392, + "loss": 2.0537, + "step": 2394 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005467694601705977, + "loss": 2.0557, + "step": 2395 + }, + { + "epoch": 0.49, + "learning_rate": 0.000546442203935746, + "loss": 2.043, + "step": 2396 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005461149276313046, + "loss": 2.0156, + "step": 2397 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005457876313987033, + "loss": 1.9912, + "step": 2398 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005454603153793804, + "loss": 1.9873, + "step": 2399 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005451329797147822, + "loss": 2.0547, + "step": 2400 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005448056245463644, + "loss": 2.0107, + "step": 2401 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005444782500155904, + "loss": 2.0225, + "step": 2402 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005441508562639322, + "loss": 1.9775, + "step": 2403 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005438234434328704, + "loss": 1.9922, + "step": 2404 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005434960116638932, + "loss": 2.0664, + "step": 2405 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005431685610984976, + "loss": 1.9971, + "step": 2406 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005428410918781884, + "loss": 1.9717, + "step": 2407 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005425136041444786, + "loss": 1.9717, + "step": 2408 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005421860980388892, + "loss": 2.0234, + "step": 2409 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005418585737029489, + "loss": 1.9727, + "step": 2410 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005415310312781943, + "loss": 1.9863, + "step": 2411 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005412034709061705, + "loss": 1.9912, + "step": 2412 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005408758927284294, + "loss": 1.9316, + "step": 2413 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005405482968865312, + "loss": 2.0098, + "step": 2414 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005402206835220435, + "loss": 2.0059, + "step": 2415 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005398930527765415, + "loss": 1.9893, + "step": 2416 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005395654047916082, + "loss": 1.9473, + "step": 2417 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005392377397088334, + "loss": 2.041, + "step": 2418 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005389100576698149, + "loss": 1.9648, + "step": 2419 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005385823588161577, + "loss": 1.9619, + "step": 2420 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005382546432894737, + "loss": 2.1045, + "step": 2421 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005379269112313823, + "loss": 2.0215, + "step": 2422 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005375991627835103, + "loss": 1.9697, + "step": 2423 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005372713980874913, + "loss": 2.0342, + "step": 2424 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005369436172849657, + "loss": 1.9727, + "step": 2425 + }, + { + "epoch": 0.49, + "learning_rate": 0.000536615820517581, + "loss": 2.0576, + "step": 2426 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005362880079269921, + "loss": 1.9473, + "step": 2427 + }, + { + "epoch": 0.49, + "learning_rate": 0.00053596017965486, + "loss": 2.041, + "step": 2428 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005356323358428528, + "loss": 1.9424, + "step": 2429 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005353044766326455, + "loss": 2.0342, + "step": 2430 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005349766021659195, + "loss": 1.9502, + "step": 2431 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005346487125843627, + "loss": 1.9775, + "step": 2432 + }, + { + "epoch": 0.49, + "learning_rate": 0.00053432080802967, + "loss": 1.9199, + "step": 2433 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005339928886435423, + "loss": 2.0977, + "step": 2434 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005336649545676869, + "loss": 2.082, + "step": 2435 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005333370059438179, + "loss": 1.9824, + "step": 2436 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005330090429136552, + "loss": 2.0381, + "step": 2437 + }, + { + "epoch": 0.49, + "learning_rate": 0.0005326810656189254, + "loss": 2.0186, + "step": 2438 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005323530742013608, + "loss": 1.9434, + "step": 2439 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005320250688027, + "loss": 2.0215, + "step": 2440 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005316970495646878, + "loss": 2.0859, + "step": 2441 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005313690166290746, + "loss": 1.9941, + "step": 2442 + }, + { + "epoch": 0.5, + "learning_rate": 0.000531040970137617, + "loss": 2.0029, + "step": 2443 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005307129102320777, + "loss": 2.0098, + "step": 2444 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005303848370542246, + "loss": 2.0508, + "step": 2445 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005300567507458315, + "loss": 2.041, + "step": 2446 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005297286514486785, + "loss": 1.9814, + "step": 2447 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005294005393045502, + "loss": 2.0312, + "step": 2448 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005290724144552379, + "loss": 1.9873, + "step": 2449 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005287442770425377, + "loss": 2.0361, + "step": 2450 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005284161272082513, + "loss": 1.8965, + "step": 2451 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005280879650941857, + "loss": 2.0898, + "step": 2452 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005277597908421531, + "loss": 1.9492, + "step": 2453 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005274316045939714, + "loss": 1.9316, + "step": 2454 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005271034064914634, + "loss": 2.0137, + "step": 2455 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005267751966764569, + "loss": 1.9707, + "step": 2456 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005264469752907848, + "loss": 2.0205, + "step": 2457 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005261187424762854, + "loss": 1.9834, + "step": 2458 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005257904983748013, + "loss": 1.999, + "step": 2459 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005254622431281804, + "loss": 2.0439, + "step": 2460 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005251339768782753, + "loss": 1.916, + "step": 2461 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005248056997669434, + "loss": 1.9844, + "step": 2462 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005244774119360468, + "loss": 1.918, + "step": 2463 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005241491135274521, + "loss": 2.0, + "step": 2464 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005238208046830307, + "loss": 2.0088, + "step": 2465 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005234924855446584, + "loss": 2.043, + "step": 2466 + }, + { + "epoch": 0.5, + "learning_rate": 0.000523164156254215, + "loss": 1.9482, + "step": 2467 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005228358169535856, + "loss": 2.002, + "step": 2468 + }, + { + "epoch": 0.5, + "learning_rate": 0.000522507467784659, + "loss": 2.0674, + "step": 2469 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005221791088893282, + "loss": 1.9971, + "step": 2470 + }, + { + "epoch": 0.5, + "learning_rate": 0.000521850740409491, + "loss": 2.0059, + "step": 2471 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005215223624870487, + "loss": 2.0332, + "step": 2472 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005211939752639067, + "loss": 2.0264, + "step": 2473 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005208655788819751, + "loss": 1.9824, + "step": 2474 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005205371734831675, + "loss": 2.0234, + "step": 2475 + }, + { + "epoch": 0.5, + "learning_rate": 0.000520208759209401, + "loss": 1.9795, + "step": 2476 + }, + { + "epoch": 0.5, + "learning_rate": 0.000519880336202597, + "loss": 1.9854, + "step": 2477 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005195519046046808, + "loss": 2.0635, + "step": 2478 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005192234645575814, + "loss": 2.0215, + "step": 2479 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005188950162032309, + "loss": 1.9307, + "step": 2480 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005185665596835655, + "loss": 2.0176, + "step": 2481 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005182380951405249, + "loss": 2.0312, + "step": 2482 + }, + { + "epoch": 0.5, + "learning_rate": 0.000517909622716052, + "loss": 2.0381, + "step": 2483 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005175811425520933, + "loss": 2.0781, + "step": 2484 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005172526547905987, + "loss": 1.9307, + "step": 2485 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005169241595735213, + "loss": 1.9912, + "step": 2486 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005165956570428172, + "loss": 1.9766, + "step": 2487 + }, + { + "epoch": 0.5, + "learning_rate": 0.0005162671473404461, + "loss": 2.0254, + "step": 2488 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005159386306083705, + "loss": 1.9795, + "step": 2489 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005156101069885563, + "loss": 1.9834, + "step": 2490 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005152815766229714, + "loss": 2.1152, + "step": 2491 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005149530396535879, + "loss": 1.9971, + "step": 2492 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005146244962223799, + "loss": 2.0586, + "step": 2493 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005142959464713247, + "loss": 2.0264, + "step": 2494 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005139673905424021, + "loss": 2.0186, + "step": 2495 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005136388285775946, + "loss": 1.9893, + "step": 2496 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005133102607188875, + "loss": 2.1211, + "step": 2497 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005129816871082683, + "loss": 1.9775, + "step": 2498 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005126531078877274, + "loss": 1.9717, + "step": 2499 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005123245231992573, + "loss": 1.9199, + "step": 2500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005119959331848531, + "loss": 1.9912, + "step": 2501 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005116673379865119, + "loss": 1.9531, + "step": 2502 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005113387377462334, + "loss": 1.9492, + "step": 2503 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005110101326060192, + "loss": 2.1455, + "step": 2504 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005106815227078729, + "loss": 2.1533, + "step": 2505 + }, + { + "epoch": 0.51, + "learning_rate": 0.000510352908193801, + "loss": 1.9785, + "step": 2506 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005100242892058108, + "loss": 1.9795, + "step": 2507 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005096956658859122, + "loss": 2.0674, + "step": 2508 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005093670383761171, + "loss": 1.9873, + "step": 2509 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005090384068184387, + "loss": 2.0791, + "step": 2510 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005087097713548926, + "loss": 2.0088, + "step": 2511 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005083811321274953, + "loss": 2.0479, + "step": 2512 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005080524892782655, + "loss": 2.1641, + "step": 2513 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005077238429492236, + "loss": 1.8809, + "step": 2514 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005073951932823907, + "loss": 2.0283, + "step": 2515 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005070665404197903, + "loss": 1.9551, + "step": 2516 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005067378845034467, + "loss": 2.0352, + "step": 2517 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005064092256753856, + "loss": 2.084, + "step": 2518 + }, + { + "epoch": 0.51, + "learning_rate": 0.000506080564077634, + "loss": 1.9961, + "step": 2519 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005057518998522202, + "loss": 1.9199, + "step": 2520 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005054232331411735, + "loss": 2.0205, + "step": 2521 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005050945640865244, + "loss": 1.8857, + "step": 2522 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005047658928303043, + "loss": 1.9902, + "step": 2523 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005044372195145455, + "loss": 2.0029, + "step": 2524 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005041085442812815, + "loss": 2.0615, + "step": 2525 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005037798672725461, + "loss": 1.9492, + "step": 2526 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005034511886303746, + "loss": 1.9941, + "step": 2527 + }, + { + "epoch": 0.51, + "learning_rate": 0.000503122508496802, + "loss": 2.0498, + "step": 2528 + }, + { + "epoch": 0.51, + "learning_rate": 0.000502793827013865, + "loss": 1.9785, + "step": 2529 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005024651443236003, + "loss": 2.0537, + "step": 2530 + }, + { + "epoch": 0.51, + "learning_rate": 0.000502136460568045, + "loss": 1.9639, + "step": 2531 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005018077758892372, + "loss": 2.0283, + "step": 2532 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005014790904292149, + "loss": 2.0635, + "step": 2533 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005011504043300167, + "loss": 1.9355, + "step": 2534 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005008217177336817, + "loss": 1.9795, + "step": 2535 + }, + { + "epoch": 0.51, + "learning_rate": 0.0005004930307822481, + "loss": 2.1133, + "step": 2536 + }, + { + "epoch": 0.51, + "learning_rate": 0.000500164343617756, + "loss": 2.043, + "step": 2537 + }, + { + "epoch": 0.52, + "learning_rate": 0.000499835656382244, + "loss": 1.9775, + "step": 2538 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004995069692177519, + "loss": 1.9316, + "step": 2539 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004991782822663186, + "loss": 2.0068, + "step": 2540 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004988495956699832, + "loss": 1.9658, + "step": 2541 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004985209095707851, + "loss": 1.9951, + "step": 2542 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004981922241107629, + "loss": 1.8896, + "step": 2543 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004978635394319549, + "loss": 2.0547, + "step": 2544 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004975348556763999, + "loss": 1.9326, + "step": 2545 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004972061729861351, + "loss": 2.0342, + "step": 2546 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004968774915031981, + "loss": 1.9502, + "step": 2547 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004965488113696257, + "loss": 2.0498, + "step": 2548 + }, + { + "epoch": 0.52, + "learning_rate": 0.000496220132727454, + "loss": 1.9971, + "step": 2549 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004958914557187187, + "loss": 2.0625, + "step": 2550 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004955627804854544, + "loss": 1.9648, + "step": 2551 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004952341071696959, + "loss": 2.0283, + "step": 2552 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004949054359134757, + "loss": 2.0557, + "step": 2553 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004945767668588264, + "loss": 1.9893, + "step": 2554 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004942481001477799, + "loss": 2.0244, + "step": 2555 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004939194359223662, + "loss": 1.9932, + "step": 2556 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004935907743246145, + "loss": 1.9189, + "step": 2557 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004932621154965534, + "loss": 1.9561, + "step": 2558 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004929334595802098, + "loss": 2.0254, + "step": 2559 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004926048067176093, + "loss": 1.8916, + "step": 2560 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004922761570507765, + "loss": 2.002, + "step": 2561 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004919475107217346, + "loss": 2.002, + "step": 2562 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004916188678725049, + "loss": 2.0635, + "step": 2563 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004912902286451074, + "loss": 2.0469, + "step": 2564 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004909615931815613, + "loss": 2.0732, + "step": 2565 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004906329616238831, + "loss": 1.9092, + "step": 2566 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004903043341140879, + "loss": 1.9873, + "step": 2567 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004899757107941894, + "loss": 2.0225, + "step": 2568 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004896470918061992, + "loss": 2.0361, + "step": 2569 + }, + { + "epoch": 0.52, + "learning_rate": 0.000489318477292127, + "loss": 2.0059, + "step": 2570 + }, + { + "epoch": 0.52, + "learning_rate": 0.000488989867393981, + "loss": 2.0479, + "step": 2571 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004886612622537668, + "loss": 1.9541, + "step": 2572 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004883326620134881, + "loss": 2.0127, + "step": 2573 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004880040668151471, + "loss": 1.9717, + "step": 2574 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004876754768007428, + "loss": 2.0371, + "step": 2575 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004873468921122726, + "loss": 2.0195, + "step": 2576 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004870183128917318, + "loss": 2.082, + "step": 2577 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004866897392811127, + "loss": 1.9688, + "step": 2578 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004863611714224054, + "loss": 1.9697, + "step": 2579 + }, + { + "epoch": 0.52, + "learning_rate": 0.000486032609457598, + "loss": 2.1094, + "step": 2580 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048570405352867536, + "loss": 2.0508, + "step": 2581 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004853755037776202, + "loss": 2.0381, + "step": 2582 + }, + { + "epoch": 0.52, + "learning_rate": 0.00048504696034641214, + "loss": 1.9492, + "step": 2583 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004847184233770288, + "loss": 1.9492, + "step": 2584 + }, + { + "epoch": 0.52, + "learning_rate": 0.000484389893011444, + "loss": 1.9639, + "step": 2585 + }, + { + "epoch": 0.52, + "learning_rate": 0.0004840613693916294, + "loss": 1.9004, + "step": 2586 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004837328526595539, + "loss": 2.001, + "step": 2587 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048340434295718283, + "loss": 2.0293, + "step": 2588 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048307584042647875, + "loss": 1.9902, + "step": 2589 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004827473452094013, + "loss": 1.8955, + "step": 2590 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004824188574479067, + "loss": 1.9873, + "step": 2591 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004820903772839481, + "loss": 2.0176, + "step": 2592 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004817619048594752, + "loss": 1.9395, + "step": 2593 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004814334403164346, + "loss": 1.9844, + "step": 2594 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048110498379676914, + "loss": 2.085, + "step": 2595 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004807765354424186, + "loss": 2.0156, + "step": 2596 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004804480953953192, + "loss": 2.0742, + "step": 2597 + }, + { + "epoch": 0.53, + "learning_rate": 0.00048011966379740305, + "loss": 1.9951, + "step": 2598 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047979124079059913, + "loss": 1.9736, + "step": 2599 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047946282651683273, + "loss": 2.1328, + "step": 2600 + }, + { + "epoch": 0.53, + "learning_rate": 0.000479134421118025, + "loss": 2.0225, + "step": 2601 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047880602473609324, + "loss": 1.9219, + "step": 2602 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047847763751295144, + "loss": 1.9033, + "step": 2603 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047814925959050917, + "loss": 2.1104, + "step": 2604 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047782089111067175, + "loss": 1.9268, + "step": 2605 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047749253221534107, + "loss": 1.9814, + "step": 2606 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004771641830464146, + "loss": 2.0107, + "step": 2607 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047683584374578504, + "loss": 2.0156, + "step": 2608 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047650751445534175, + "loss": 2.04, + "step": 2609 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004761791953169694, + "loss": 2.0244, + "step": 2610 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004758508864725478, + "loss": 1.9893, + "step": 2611 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047552258806395325, + "loss": 2.0596, + "step": 2612 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047519430023305664, + "loss": 1.9248, + "step": 2613 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004748660231217248, + "loss": 2.1367, + "step": 2614 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047453775687181964, + "loss": 1.9619, + "step": 2615 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047420950162519875, + "loss": 1.9668, + "step": 2616 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047388125752371475, + "loss": 2.0186, + "step": 2617 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004735530247092151, + "loss": 2.0107, + "step": 2618 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004732248033235432, + "loss": 1.9824, + "step": 2619 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004728965935085367, + "loss": 1.9277, + "step": 2620 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047256839540602854, + "loss": 2.0283, + "step": 2621 + }, + { + "epoch": 0.53, + "learning_rate": 0.000472240209157847, + "loss": 1.9707, + "step": 2622 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004719120349058145, + "loss": 2.041, + "step": 2623 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047158387279174873, + "loss": 2.042, + "step": 2624 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004712557229574624, + "loss": 1.9814, + "step": 2625 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004709275855447621, + "loss": 1.9805, + "step": 2626 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047059946069544966, + "loss": 1.9805, + "step": 2627 + }, + { + "epoch": 0.53, + "learning_rate": 0.00047027134855132167, + "loss": 2.0391, + "step": 2628 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046994324925416864, + "loss": 2.0332, + "step": 2629 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004696151629457756, + "loss": 1.998, + "step": 2630 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004692870897679224, + "loss": 1.9893, + "step": 2631 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046895902986238304, + "loss": 2.0615, + "step": 2632 + }, + { + "epoch": 0.53, + "learning_rate": 0.00046863098337092553, + "loss": 2.0254, + "step": 2633 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004683029504353123, + "loss": 2.1318, + "step": 2634 + }, + { + "epoch": 0.53, + "learning_rate": 0.0004679749311973001, + "loss": 1.9883, + "step": 2635 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004676469257986394, + "loss": 2.0078, + "step": 2636 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004673189343810747, + "loss": 1.9746, + "step": 2637 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046699095708634483, + "loss": 2.0498, + "step": 2638 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004666629940561823, + "loss": 1.8848, + "step": 2639 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004663350454323132, + "loss": 2.0146, + "step": 2640 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004660071113564579, + "loss": 2.0254, + "step": 2641 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004656791919703302, + "loss": 1.9619, + "step": 2642 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046535128741563727, + "loss": 1.9424, + "step": 2643 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004650233978340805, + "loss": 1.9629, + "step": 2644 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004646955233673546, + "loss": 1.9902, + "step": 2645 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004643676641571472, + "loss": 1.9961, + "step": 2646 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046403982034514015, + "loss": 1.9668, + "step": 2647 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046371199207300795, + "loss": 2.0918, + "step": 2648 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046338417948241906, + "loss": 1.9385, + "step": 2649 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004630563827150344, + "loss": 2.001, + "step": 2650 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046272860191250875, + "loss": 1.9688, + "step": 2651 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046240083721648973, + "loss": 1.9355, + "step": 2652 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046207308876861764, + "loss": 1.917, + "step": 2653 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004617453567105264, + "loss": 2.0459, + "step": 2654 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046141764118384256, + "loss": 2.0615, + "step": 2655 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004610899423301851, + "loss": 2.0566, + "step": 2656 + }, + { + "epoch": 0.54, + "learning_rate": 0.00046076226029116674, + "loss": 2.0381, + "step": 2657 + }, + { + "epoch": 0.54, + "learning_rate": 0.000460434595208392, + "loss": 1.9893, + "step": 2658 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004601069472234584, + "loss": 1.9834, + "step": 2659 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004597793164779566, + "loss": 2.0273, + "step": 2660 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004594517031134689, + "loss": 1.9824, + "step": 2661 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004591241072715706, + "loss": 1.9775, + "step": 2662 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004587965290938296, + "loss": 1.998, + "step": 2663 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045846896872180575, + "loss": 1.9541, + "step": 2664 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045814142629705133, + "loss": 2.041, + "step": 2665 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004578139019611109, + "loss": 1.9551, + "step": 2666 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045748639585552143, + "loss": 2.0791, + "step": 2667 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004571589081218116, + "loss": 2.0918, + "step": 2668 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045683143890150237, + "loss": 1.8779, + "step": 2669 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045650398833610683, + "loss": 1.8643, + "step": 2670 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004561765565671297, + "loss": 1.9961, + "step": 2671 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004558491437360677, + "loss": 1.9209, + "step": 2672 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004555217499844097, + "loss": 2.0791, + "step": 2673 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004551943754536358, + "loss": 1.999, + "step": 2674 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004548670202852178, + "loss": 2.0361, + "step": 2675 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004545396846206198, + "loss": 2.0166, + "step": 2676 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045421236860129685, + "loss": 1.9873, + "step": 2677 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045388507236869546, + "loss": 1.9922, + "step": 2678 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045355779606425406, + "loss": 1.9834, + "step": 2679 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045323053982940237, + "loss": 2.002, + "step": 2680 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004529033038055609, + "loss": 1.7871, + "step": 2681 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004525760881341421, + "loss": 1.9414, + "step": 2682 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045224889295654924, + "loss": 1.9717, + "step": 2683 + }, + { + "epoch": 0.54, + "learning_rate": 0.00045192171841417693, + "loss": 1.9561, + "step": 2684 + }, + { + "epoch": 0.54, + "learning_rate": 0.0004515945646484105, + "loss": 1.9551, + "step": 2685 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004512674318006268, + "loss": 1.9346, + "step": 2686 + }, + { + "epoch": 0.55, + "learning_rate": 0.00045094032001219355, + "loss": 1.8955, + "step": 2687 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004506132294244687, + "loss": 1.9209, + "step": 2688 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004502861601788022, + "loss": 2.0293, + "step": 2689 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044995911241653387, + "loss": 1.9404, + "step": 2690 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044963208627899425, + "loss": 1.9336, + "step": 2691 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044930508190750535, + "loss": 1.9111, + "step": 2692 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004489780994433788, + "loss": 2.0479, + "step": 2693 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004486511390279172, + "loss": 1.9287, + "step": 2694 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044832420080241403, + "loss": 1.916, + "step": 2695 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044799728490815235, + "loss": 1.9854, + "step": 2696 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044767039148640596, + "loss": 2.0479, + "step": 2697 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044734352067843944, + "loss": 1.96, + "step": 2698 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004470166726255068, + "loss": 1.9463, + "step": 2699 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004466898474688524, + "loss": 2.04, + "step": 2700 + }, + { + "epoch": 0.55, + "learning_rate": 0.000446363045349711, + "loss": 1.9561, + "step": 2701 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004460362664093075, + "loss": 2.001, + "step": 2702 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044570951078885617, + "loss": 1.9629, + "step": 2703 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004453827786295617, + "loss": 1.9336, + "step": 2704 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044505607007261865, + "loss": 2.0088, + "step": 2705 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044472938525921113, + "loss": 2.0068, + "step": 2706 + }, + { + "epoch": 0.55, + "learning_rate": 0.000444402724330513, + "loss": 2.0498, + "step": 2707 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044407608742768825, + "loss": 2.0195, + "step": 2708 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044374947469188986, + "loss": 2.0, + "step": 2709 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004434228862642605, + "loss": 2.0469, + "step": 2710 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004430963222859329, + "loss": 2.0664, + "step": 2711 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004427697828980286, + "loss": 1.9941, + "step": 2712 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044244326824165853, + "loss": 1.9844, + "step": 2713 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044211677845792344, + "loss": 2.0781, + "step": 2714 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044179031368791295, + "loss": 1.9814, + "step": 2715 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004414638740727058, + "loss": 1.9609, + "step": 2716 + }, + { + "epoch": 0.55, + "learning_rate": 0.00044113745975336995, + "loss": 2.0049, + "step": 2717 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004408110708709628, + "loss": 1.9463, + "step": 2718 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004404847075665302, + "loss": 2.0098, + "step": 2719 + }, + { + "epoch": 0.55, + "learning_rate": 0.000440158369981107, + "loss": 2.0029, + "step": 2720 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043983205825571744, + "loss": 1.9805, + "step": 2721 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043950577253137424, + "loss": 1.917, + "step": 2722 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004391795129490786, + "loss": 1.959, + "step": 2723 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043885327964982105, + "loss": 1.9707, + "step": 2724 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043852707277458033, + "loss": 2.0596, + "step": 2725 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004382008924643237, + "loss": 2.0439, + "step": 2726 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043787473886000745, + "loss": 2.0439, + "step": 2727 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043754861210257566, + "loss": 1.9531, + "step": 2728 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004372225123329613, + "loss": 1.9053, + "step": 2729 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043689643969208557, + "loss": 1.8955, + "step": 2730 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043657039432085776, + "loss": 2.0381, + "step": 2731 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043624437636017547, + "loss": 2.0879, + "step": 2732 + }, + { + "epoch": 0.55, + "learning_rate": 0.0004359183859509247, + "loss": 1.9219, + "step": 2733 + }, + { + "epoch": 0.55, + "learning_rate": 0.00043559242323397917, + "loss": 2.0898, + "step": 2734 + }, + { + "epoch": 0.56, + "learning_rate": 0.000435266488350201, + "loss": 1.9912, + "step": 2735 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004349405814404396, + "loss": 1.9746, + "step": 2736 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004346147026455334, + "loss": 1.9229, + "step": 2737 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043428885210630757, + "loss": 2.0049, + "step": 2738 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004339630299635756, + "loss": 1.8623, + "step": 2739 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043363723635813907, + "loss": 2.0234, + "step": 2740 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004333114714307864, + "loss": 2.041, + "step": 2741 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004329857353222941, + "loss": 1.9121, + "step": 2742 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043266002817342634, + "loss": 2.001, + "step": 2743 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004323343501249346, + "loss": 1.8916, + "step": 2744 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004320087013175574, + "loss": 1.9375, + "step": 2745 + }, + { + "epoch": 0.56, + "learning_rate": 0.00043168308189202136, + "loss": 2.0684, + "step": 2746 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004313574919890401, + "loss": 2.0176, + "step": 2747 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004310319317493141, + "loss": 1.9336, + "step": 2748 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004307064013135316, + "loss": 1.8398, + "step": 2749 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004303809008223678, + "loss": 1.999, + "step": 2750 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004300554304164846, + "loss": 2.0371, + "step": 2751 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004297299902365311, + "loss": 2.0215, + "step": 2752 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042940458042314394, + "loss": 1.9893, + "step": 2753 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042907920111694576, + "loss": 1.9775, + "step": 2754 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004287538524585461, + "loss": 1.9121, + "step": 2755 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004284285345885419, + "loss": 2.0527, + "step": 2756 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042810324764751647, + "loss": 1.8789, + "step": 2757 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004277779917760393, + "loss": 1.9512, + "step": 2758 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042745276711466713, + "loss": 1.9766, + "step": 2759 + }, + { + "epoch": 0.56, + "learning_rate": 0.000427127573803943, + "loss": 1.9678, + "step": 2760 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004268024119843961, + "loss": 1.9346, + "step": 2761 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042647728179654235, + "loss": 2.0225, + "step": 2762 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042615218338088405, + "loss": 2.0156, + "step": 2763 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004258271168779093, + "loss": 1.9775, + "step": 2764 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004255020824280931, + "loss": 1.8975, + "step": 2765 + }, + { + "epoch": 0.56, + "learning_rate": 0.000425177080171896, + "loss": 2.0322, + "step": 2766 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042485211024976497, + "loss": 2.0352, + "step": 2767 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042452717280213253, + "loss": 1.9814, + "step": 2768 + }, + { + "epoch": 0.56, + "learning_rate": 0.000424202267969418, + "loss": 2.0215, + "step": 2769 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004238773958920261, + "loss": 2.0156, + "step": 2770 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042355255671034715, + "loss": 1.9297, + "step": 2771 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004232277505647579, + "loss": 2.0635, + "step": 2772 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042290297759562013, + "loss": 1.9502, + "step": 2773 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004225782379432817, + "loss": 1.9131, + "step": 2774 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004222535317480762, + "loss": 1.9404, + "step": 2775 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042192885915032235, + "loss": 1.9258, + "step": 2776 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004216042202903245, + "loss": 1.8545, + "step": 2777 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042127961530837276, + "loss": 2.0303, + "step": 2778 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004209550443447421, + "loss": 2.0293, + "step": 2779 + }, + { + "epoch": 0.56, + "learning_rate": 0.00042063050753969296, + "loss": 1.9043, + "step": 2780 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004203060050334713, + "loss": 1.8926, + "step": 2781 + }, + { + "epoch": 0.56, + "learning_rate": 0.00041998153696630794, + "loss": 1.9824, + "step": 2782 + }, + { + "epoch": 0.56, + "learning_rate": 0.0004196571034784186, + "loss": 1.9121, + "step": 2783 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004193327047100046, + "loss": 1.9795, + "step": 2784 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041900834080125214, + "loss": 1.9521, + "step": 2785 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041868401189233207, + "loss": 2.0361, + "step": 2786 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041835971812340014, + "loss": 1.9424, + "step": 2787 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041803545963459734, + "loss": 1.8418, + "step": 2788 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041771123656604906, + "loss": 1.8945, + "step": 2789 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041738704905786505, + "loss": 1.8672, + "step": 2790 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041706289725014056, + "loss": 1.9248, + "step": 2791 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004167387812829549, + "loss": 1.957, + "step": 2792 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041641470129637155, + "loss": 1.9443, + "step": 2793 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041609065743043917, + "loss": 2.0312, + "step": 2794 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041576664982519054, + "loss": 1.9512, + "step": 2795 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004154426786206423, + "loss": 2.0059, + "step": 2796 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041511874395679603, + "loss": 2.0195, + "step": 2797 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041479484597363735, + "loss": 1.9697, + "step": 2798 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041447098481113553, + "loss": 1.9033, + "step": 2799 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041414716060924483, + "loss": 1.9404, + "step": 2800 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041382337350790257, + "loss": 1.9648, + "step": 2801 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041349962364703083, + "loss": 1.9795, + "step": 2802 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041317591116653486, + "loss": 2.0967, + "step": 2803 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004128522362063045, + "loss": 1.9551, + "step": 2804 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004125285989062131, + "loss": 2.0049, + "step": 2805 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041220499940611727, + "loss": 1.9043, + "step": 2806 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041188143784585816, + "loss": 2.0156, + "step": 2807 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041155791436525967, + "loss": 2.001, + "step": 2808 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004112344291041297, + "loss": 1.9727, + "step": 2809 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004109109822022598, + "loss": 2.0518, + "step": 2810 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004105875737994244, + "loss": 1.9961, + "step": 2811 + }, + { + "epoch": 0.57, + "learning_rate": 0.00041026420403538166, + "loss": 1.9365, + "step": 2812 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040994087304987325, + "loss": 1.9863, + "step": 2813 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004096175809826235, + "loss": 1.9189, + "step": 2814 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004092943279733402, + "loss": 1.9824, + "step": 2815 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040897111416171464, + "loss": 1.9092, + "step": 2816 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040864793968742053, + "loss": 2.0059, + "step": 2817 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004083248046901149, + "loss": 1.8975, + "step": 2818 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040800170930943764, + "loss": 1.9658, + "step": 2819 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004076786536850119, + "loss": 1.8955, + "step": 2820 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004073556379564429, + "loss": 1.8838, + "step": 2821 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004070326622633192, + "loss": 2.0234, + "step": 2822 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040670972674521213, + "loss": 1.9531, + "step": 2823 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040638683154167515, + "loss": 1.9766, + "step": 2824 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040606397679224455, + "loss": 1.9834, + "step": 2825 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004057411626364395, + "loss": 1.9883, + "step": 2826 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004054183892137611, + "loss": 1.8818, + "step": 2827 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004050956566636928, + "loss": 1.8994, + "step": 2828 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040477296512570095, + "loss": 1.9111, + "step": 2829 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004044503147392339, + "loss": 1.9688, + "step": 2830 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004041277056437218, + "loss": 2.0293, + "step": 2831 + }, + { + "epoch": 0.57, + "learning_rate": 0.00040380513797857765, + "loss": 1.9307, + "step": 2832 + }, + { + "epoch": 0.57, + "learning_rate": 0.0004034826118831963, + "loss": 1.8916, + "step": 2833 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004031601274969542, + "loss": 1.9941, + "step": 2834 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040283768495921046, + "loss": 1.9355, + "step": 2835 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004025152844093056, + "loss": 2.0264, + "step": 2836 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040219292598656244, + "loss": 2.0029, + "step": 2837 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040187060983028477, + "loss": 1.9736, + "step": 2838 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004015483360797593, + "loss": 2.001, + "step": 2839 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004012261048742536, + "loss": 2.0254, + "step": 2840 + }, + { + "epoch": 0.58, + "learning_rate": 0.0004009039163530167, + "loss": 1.9893, + "step": 2841 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040058177065528, + "loss": 2.0234, + "step": 2842 + }, + { + "epoch": 0.58, + "learning_rate": 0.00040025966792025586, + "loss": 2.0361, + "step": 2843 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039993760828713784, + "loss": 1.8809, + "step": 2844 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039961559189510154, + "loss": 2.0312, + "step": 2845 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039929361888330317, + "loss": 1.9844, + "step": 2846 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039897168939088067, + "loss": 1.9336, + "step": 2847 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039864980355695323, + "loss": 1.8994, + "step": 2848 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003983279615206208, + "loss": 1.9521, + "step": 2849 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003980061634209643, + "loss": 2.0059, + "step": 2850 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039768440939704663, + "loss": 2.002, + "step": 2851 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003973626995879105, + "loss": 1.9365, + "step": 2852 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003970410341325799, + "loss": 1.9609, + "step": 2853 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039671941317005976, + "loss": 2.0361, + "step": 2854 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003963978368393361, + "loss": 1.9775, + "step": 2855 + }, + { + "epoch": 0.58, + "learning_rate": 0.000396076305279375, + "loss": 1.9531, + "step": 2856 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003957548186291233, + "loss": 1.9609, + "step": 2857 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039543337702750914, + "loss": 1.9053, + "step": 2858 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039511198061344035, + "loss": 2.0459, + "step": 2859 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003947906295258054, + "loss": 1.9404, + "step": 2860 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039446932390347386, + "loss": 1.9482, + "step": 2861 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039414806388529477, + "loss": 1.9463, + "step": 2862 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003938268496100976, + "loss": 1.8555, + "step": 2863 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039350568121669275, + "loss": 1.9072, + "step": 2864 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003931845588438701, + "loss": 1.9307, + "step": 2865 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039286348263039975, + "loss": 2.0732, + "step": 2866 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003925424527150321, + "loss": 2.0098, + "step": 2867 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003922214692364974, + "loss": 1.9736, + "step": 2868 + }, + { + "epoch": 0.58, + "learning_rate": 0.00039190053233350577, + "loss": 2.0039, + "step": 2869 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003915796421447472, + "loss": 1.9395, + "step": 2870 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003912587988088918, + "loss": 1.9893, + "step": 2871 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003909380024645891, + "loss": 1.9736, + "step": 2872 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003906172532504681, + "loss": 2.0049, + "step": 2873 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003902965513051382, + "loss": 2.0938, + "step": 2874 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038997589676718783, + "loss": 1.9072, + "step": 2875 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003896552897751846, + "loss": 2.0488, + "step": 2876 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038933473046767655, + "loss": 1.9727, + "step": 2877 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003890142189831904, + "loss": 1.9395, + "step": 2878 + }, + { + "epoch": 0.58, + "learning_rate": 0.00038869375546023204, + "loss": 1.8867, + "step": 2879 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003883733400372875, + "loss": 1.9971, + "step": 2880 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003880529728528211, + "loss": 1.9824, + "step": 2881 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003877326540452767, + "loss": 2.0391, + "step": 2882 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003874123837530775, + "loss": 1.9521, + "step": 2883 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038709216211462534, + "loss": 1.9473, + "step": 2884 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003867719892683013, + "loss": 1.9844, + "step": 2885 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003864518653524648, + "loss": 1.9893, + "step": 2886 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003861317905054551, + "loss": 1.8955, + "step": 2887 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038581176486558966, + "loss": 1.9521, + "step": 2888 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003854917885711644, + "loss": 2.0098, + "step": 2889 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003851718617604547, + "loss": 2.043, + "step": 2890 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003848519845717138, + "loss": 1.9307, + "step": 2891 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003845321571431737, + "loss": 2.0244, + "step": 2892 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003842123796130455, + "loss": 1.9473, + "step": 2893 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003838926521195178, + "loss": 1.9531, + "step": 2894 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003835729748007582, + "loss": 1.9707, + "step": 2895 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038325334779491256, + "loss": 1.9746, + "step": 2896 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003829337712401048, + "loss": 1.9844, + "step": 2897 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003826142452744367, + "loss": 1.8311, + "step": 2898 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003822947700359891, + "loss": 1.9658, + "step": 2899 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003819753456628204, + "loss": 2.0146, + "step": 2900 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003816559722929667, + "loss": 1.9619, + "step": 2901 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038133665006444257, + "loss": 1.9072, + "step": 2902 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003810173791152405, + "loss": 1.9561, + "step": 2903 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003806981595833303, + "loss": 1.8965, + "step": 2904 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038037899160665986, + "loss": 1.9277, + "step": 2905 + }, + { + "epoch": 0.59, + "learning_rate": 0.00038005987532315513, + "loss": 1.9297, + "step": 2906 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037974081087071925, + "loss": 2.0049, + "step": 2907 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037942179838723296, + "loss": 1.9756, + "step": 2908 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037910283801055486, + "loss": 1.9609, + "step": 2909 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037878392987852097, + "loss": 1.9951, + "step": 2910 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003784650741289442, + "loss": 1.9971, + "step": 2911 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003781462708996157, + "loss": 1.9277, + "step": 2912 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037782752032830346, + "loss": 1.9404, + "step": 2913 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037750882255275245, + "loss": 1.9482, + "step": 2914 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037719017771068537, + "loss": 1.9512, + "step": 2915 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003768715859398016, + "loss": 1.9072, + "step": 2916 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003765530473777777, + "loss": 2.0596, + "step": 2917 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003762345621622677, + "loss": 1.8887, + "step": 2918 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037591613043090177, + "loss": 1.9492, + "step": 2919 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037559775232128765, + "loss": 1.9688, + "step": 2920 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003752794279710094, + "loss": 1.9482, + "step": 2921 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003749611575176282, + "loss": 1.9922, + "step": 2922 + }, + { + "epoch": 0.59, + "learning_rate": 0.000374642941098682, + "loss": 2.001, + "step": 2923 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003743247788516848, + "loss": 1.9033, + "step": 2924 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003740066709141281, + "loss": 2.0664, + "step": 2925 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037368861742347916, + "loss": 1.9805, + "step": 2926 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003733706185171819, + "loss": 1.9072, + "step": 2927 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003730526743326571, + "loss": 1.9502, + "step": 2928 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003727347850073012, + "loss": 1.916, + "step": 2929 + }, + { + "epoch": 0.59, + "learning_rate": 0.00037241695067848736, + "loss": 1.9717, + "step": 2930 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003720991714835651, + "loss": 1.917, + "step": 2931 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003717814475598597, + "loss": 2.0, + "step": 2932 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003714637790446726, + "loss": 1.9033, + "step": 2933 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003711461660752819, + "loss": 1.874, + "step": 2934 + }, + { + "epoch": 0.6, + "learning_rate": 0.000370828608788941, + "loss": 1.96, + "step": 2935 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037051110732287933, + "loss": 1.959, + "step": 2936 + }, + { + "epoch": 0.6, + "learning_rate": 0.00037019366181430247, + "loss": 1.8506, + "step": 2937 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003698762724003919, + "loss": 1.9385, + "step": 2938 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003695589392183043, + "loss": 1.8965, + "step": 2939 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003692416624051727, + "loss": 1.8291, + "step": 2940 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003689244420981054, + "loss": 1.9014, + "step": 2941 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003686072784341864, + "loss": 1.9102, + "step": 2942 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036829017155047496, + "loss": 1.9756, + "step": 2943 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003679731215840063, + "loss": 1.9297, + "step": 2944 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003676561286717908, + "loss": 1.8652, + "step": 2945 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036733919295081375, + "loss": 1.9697, + "step": 2946 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036702231455803657, + "loss": 1.957, + "step": 2947 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003667054936303954, + "loss": 1.9062, + "step": 2948 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036638873030480127, + "loss": 2.04, + "step": 2949 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036607202471814106, + "loss": 1.8906, + "step": 2950 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036575537700727625, + "loss": 1.9287, + "step": 2951 + }, + { + "epoch": 0.6, + "learning_rate": 0.000365438787309043, + "loss": 1.9697, + "step": 2952 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036512225576025326, + "loss": 2.0273, + "step": 2953 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036480578249769305, + "loss": 1.916, + "step": 2954 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003644893676581237, + "loss": 1.998, + "step": 2955 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036417301137828067, + "loss": 2.0264, + "step": 2956 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036385671379487495, + "loss": 1.9668, + "step": 2957 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003635404750445918, + "loss": 1.9238, + "step": 2958 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003632242952640906, + "loss": 2.0293, + "step": 2959 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003629081745900062, + "loss": 1.8926, + "step": 2960 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003625921131589469, + "loss": 1.9453, + "step": 2961 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036227611110749595, + "loss": 1.9082, + "step": 2962 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036196016857221113, + "loss": 1.8994, + "step": 2963 + }, + { + "epoch": 0.6, + "learning_rate": 0.000361644285689624, + "loss": 1.96, + "step": 2964 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003613284625962405, + "loss": 2.0498, + "step": 2965 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003610126994285411, + "loss": 2.0303, + "step": 2966 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036069699632297977, + "loss": 1.9336, + "step": 2967 + }, + { + "epoch": 0.6, + "learning_rate": 0.00036038135341598485, + "loss": 1.9756, + "step": 2968 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003600657708439589, + "loss": 1.8848, + "step": 2969 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035975024874327797, + "loss": 2.0605, + "step": 2970 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003594347872502921, + "loss": 1.9443, + "step": 2971 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003591193865013251, + "loss": 1.958, + "step": 2972 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035880404663267515, + "loss": 1.96, + "step": 2973 + }, + { + "epoch": 0.6, + "learning_rate": 0.000358488767780613, + "loss": 1.9795, + "step": 2974 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035817355008138386, + "loss": 1.9512, + "step": 2975 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035785839367120643, + "loss": 2.0078, + "step": 2976 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035754329868627254, + "loss": 1.9883, + "step": 2977 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003572282652627477, + "loss": 2.0137, + "step": 2978 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035691329353677116, + "loss": 1.9551, + "step": 2979 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035659838364445503, + "loss": 2.0137, + "step": 2980 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003562835357218845, + "loss": 1.9004, + "step": 2981 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035596874990511876, + "loss": 1.9355, + "step": 2982 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003556540263301896, + "loss": 1.9521, + "step": 2983 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035533936513310185, + "loss": 1.834, + "step": 2984 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035502476644983384, + "loss": 1.9629, + "step": 2985 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035471023041633667, + "loss": 2.0791, + "step": 2986 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003543957571685341, + "loss": 1.9043, + "step": 2987 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003540813468423229, + "loss": 1.96, + "step": 2988 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035376699957357293, + "loss": 2.0117, + "step": 2989 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003534527154981267, + "loss": 1.9971, + "step": 2990 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003531384947517989, + "loss": 1.9678, + "step": 2991 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035282433747037755, + "loss": 1.8291, + "step": 2992 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003525102437896231, + "loss": 1.9473, + "step": 2993 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003521962138452679, + "loss": 2.002, + "step": 2994 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035188224777301763, + "loss": 1.9521, + "step": 2995 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003515683457085499, + "loss": 1.9482, + "step": 2996 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003512545077875145, + "loss": 1.917, + "step": 2997 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003509407341455341, + "loss": 1.9102, + "step": 2998 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003506270249182028, + "loss": 1.9844, + "step": 2999 + }, + { + "epoch": 0.61, + "learning_rate": 0.00035031338024108747, + "loss": 1.9014, + "step": 3000 + }, + { + "epoch": 0.61, + "learning_rate": 0.000349999800249727, + "loss": 1.9805, + "step": 3001 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034968628507963214, + "loss": 1.9336, + "step": 3002 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003493728348662855, + "loss": 1.9121, + "step": 3003 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034905944974514226, + "loss": 1.9316, + "step": 3004 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003487461298516288, + "loss": 1.9375, + "step": 3005 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003484328753211435, + "loss": 1.915, + "step": 3006 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034811968628905643, + "loss": 2.0566, + "step": 3007 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003478065628907099, + "loss": 2.0479, + "step": 3008 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034749350526141706, + "loss": 2.0234, + "step": 3009 + }, + { + "epoch": 0.61, + "learning_rate": 0.000347180513536463, + "loss": 1.9297, + "step": 3010 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003468675878511047, + "loss": 1.9336, + "step": 3011 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003465547283405699, + "loss": 1.9375, + "step": 3012 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034624193514005806, + "loss": 1.9102, + "step": 3013 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003459292083847404, + "loss": 1.8984, + "step": 3014 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003456165482097587, + "loss": 1.8281, + "step": 3015 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034530395475022613, + "loss": 1.9619, + "step": 3016 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034499142814122766, + "loss": 1.9365, + "step": 3017 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034467896851781863, + "loss": 1.9619, + "step": 3018 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034436657601502575, + "loss": 1.9922, + "step": 3019 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003440542507678467, + "loss": 1.9893, + "step": 3020 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003437419929112503, + "loss": 1.9268, + "step": 3021 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034342980258017573, + "loss": 1.9854, + "step": 3022 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034311767990953323, + "loss": 1.9424, + "step": 3023 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003428056250342042, + "loss": 2.0234, + "step": 3024 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003424936380890403, + "loss": 1.9336, + "step": 3025 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034218171920886363, + "loss": 1.9619, + "step": 3026 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003418698685284676, + "loss": 1.874, + "step": 3027 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003415580861826156, + "loss": 2.0596, + "step": 3028 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003412463723060415, + "loss": 2.0312, + "step": 3029 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034093472703344987, + "loss": 1.8945, + "step": 3030 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003406231504995155, + "loss": 2.0049, + "step": 3031 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003403116428388832, + "loss": 1.9336, + "step": 3032 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003400002041861685, + "loss": 1.9434, + "step": 3033 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033968883467595676, + "loss": 1.8623, + "step": 3034 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003393775344428035, + "loss": 2.0156, + "step": 3035 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033906630362123466, + "loss": 1.9443, + "step": 3036 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033875514234574553, + "loss": 2.001, + "step": 3037 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003384440507508022, + "loss": 1.9639, + "step": 3038 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003381330289708395, + "loss": 1.9609, + "step": 3039 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033782207714026324, + "loss": 1.8701, + "step": 3040 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003375111953934485, + "loss": 2.0059, + "step": 3041 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003372003838647398, + "loss": 1.9541, + "step": 3042 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033688964268845204, + "loss": 1.9082, + "step": 3043 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003365789719988689, + "loss": 1.916, + "step": 3044 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033626837193024395, + "loss": 1.8643, + "step": 3045 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003359578426168007, + "loss": 1.9746, + "step": 3046 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003356473841927313, + "loss": 1.9297, + "step": 3047 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003353369967921976, + "loss": 1.9795, + "step": 3048 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033502668054933107, + "loss": 1.9932, + "step": 3049 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033471643559823206, + "loss": 1.9082, + "step": 3050 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033440626207296986, + "loss": 1.7656, + "step": 3051 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033409616010758353, + "loss": 2.0488, + "step": 3052 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033378612983608105, + "loss": 1.9609, + "step": 3053 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003334761713924388, + "loss": 1.9746, + "step": 3054 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033316628491060295, + "loss": 1.8984, + "step": 3055 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003328564705244883, + "loss": 1.9336, + "step": 3056 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003325467283679781, + "loss": 1.9893, + "step": 3057 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033223705857492473, + "loss": 1.8486, + "step": 3058 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003319274612791496, + "loss": 1.9199, + "step": 3059 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033161793661444223, + "loss": 1.9834, + "step": 3060 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033130848471456075, + "loss": 1.9639, + "step": 3061 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003309991057132324, + "loss": 1.8223, + "step": 3062 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003306897997441526, + "loss": 2.0068, + "step": 3063 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033038056694098485, + "loss": 1.9941, + "step": 3064 + }, + { + "epoch": 0.62, + "learning_rate": 0.00033007140743736177, + "loss": 1.9297, + "step": 3065 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003297623213668839, + "loss": 1.9297, + "step": 3066 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032945330886311966, + "loss": 1.9766, + "step": 3067 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032914437005960657, + "loss": 2.0518, + "step": 3068 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003288355050898495, + "loss": 1.9619, + "step": 3069 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032852671408732176, + "loss": 1.9209, + "step": 3070 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032821799718546496, + "loss": 1.9941, + "step": 3071 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032790935451768797, + "loss": 1.8916, + "step": 3072 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032760078621736833, + "loss": 1.9941, + "step": 3073 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032729229241785086, + "loss": 1.9082, + "step": 3074 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003269838732524485, + "loss": 2.0098, + "step": 3075 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003266755288544422, + "loss": 2.0322, + "step": 3076 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003263672593570797, + "loss": 1.9268, + "step": 3077 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003260590648935775, + "loss": 1.9199, + "step": 3078 + }, + { + "epoch": 0.62, + "learning_rate": 0.00032575094559711876, + "loss": 1.8594, + "step": 3079 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032544290160085453, + "loss": 2.04, + "step": 3080 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003251349330379037, + "loss": 1.9619, + "step": 3081 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032482704004135177, + "loss": 1.8682, + "step": 3082 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003245192227442519, + "loss": 2.0078, + "step": 3083 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003242114812796252, + "loss": 1.9697, + "step": 3084 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003239038157804589, + "loss": 1.9043, + "step": 3085 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032359622637970805, + "loss": 2.0049, + "step": 3086 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032328871321029494, + "loss": 1.9941, + "step": 3087 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032298127640510845, + "loss": 1.9053, + "step": 3088 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032267391609700467, + "loss": 1.8271, + "step": 3089 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003223666324188067, + "loss": 1.9736, + "step": 3090 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003220594255033046, + "loss": 1.9824, + "step": 3091 + }, + { + "epoch": 0.63, + "learning_rate": 0.00032175229548325496, + "loss": 1.9805, + "step": 3092 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003214452424913813, + "loss": 1.9551, + "step": 3093 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003211382666603741, + "loss": 1.999, + "step": 3094 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003208313681228902, + "loss": 1.9248, + "step": 3095 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003205245470115527, + "loss": 2.043, + "step": 3096 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003202178034589521, + "loss": 1.8545, + "step": 3097 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003199111375976449, + "loss": 2.0273, + "step": 3098 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003196045495601537, + "loss": 1.9453, + "step": 3099 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003192980394789682, + "loss": 1.8779, + "step": 3100 + }, + { + "epoch": 0.63, + "learning_rate": 0.000318991607486544, + "loss": 1.9912, + "step": 3101 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003186852537153026, + "loss": 1.9541, + "step": 3102 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031837897829763254, + "loss": 1.9824, + "step": 3103 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003180727813658879, + "loss": 1.9551, + "step": 3104 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003177666630523888, + "loss": 1.8848, + "step": 3105 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031746062348942186, + "loss": 1.8906, + "step": 3106 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031715466280923924, + "loss": 1.9424, + "step": 3107 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003168487811440591, + "loss": 1.9658, + "step": 3108 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003165429786260655, + "loss": 1.9346, + "step": 3109 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003162372553874085, + "loss": 1.9248, + "step": 3110 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003159316115602036, + "loss": 1.9004, + "step": 3111 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031562604727653187, + "loss": 1.96, + "step": 3112 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003153205626684407, + "loss": 1.9609, + "step": 3113 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003150151578679422, + "loss": 1.9326, + "step": 3114 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031470983300701426, + "loss": 2.0518, + "step": 3115 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003144045882176009, + "loss": 1.7979, + "step": 3116 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031409942363161034, + "loss": 1.9482, + "step": 3117 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003137943393809169, + "loss": 1.96, + "step": 3118 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003134893355973606, + "loss": 1.8691, + "step": 3119 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031318441241274544, + "loss": 1.8799, + "step": 3120 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003128795699588416, + "loss": 1.9668, + "step": 3121 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003125748083673842, + "loss": 1.9355, + "step": 3122 + }, + { + "epoch": 0.63, + "learning_rate": 0.000312270127770073, + "loss": 1.9932, + "step": 3123 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031196552829857317, + "loss": 1.9189, + "step": 3124 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003116610100845145, + "loss": 1.9854, + "step": 3125 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003113565732594923, + "loss": 2.0586, + "step": 3126 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003110522179550658, + "loss": 1.8857, + "step": 3127 + }, + { + "epoch": 0.63, + "learning_rate": 0.00031074794430275963, + "loss": 1.9287, + "step": 3128 + }, + { + "epoch": 0.64, + "learning_rate": 0.00031044375243406317, + "loss": 1.9912, + "step": 3129 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003101396424804301, + "loss": 2.0156, + "step": 3130 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030983561457327856, + "loss": 1.8564, + "step": 3131 + }, + { + "epoch": 0.64, + "learning_rate": 0.000309531668843992, + "loss": 1.8896, + "step": 3132 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003092278054239178, + "loss": 1.9521, + "step": 3133 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030892402444436756, + "loss": 1.9082, + "step": 3134 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030862032603661785, + "loss": 1.8584, + "step": 3135 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003083167103319093, + "loss": 1.9912, + "step": 3136 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003080131774614464, + "loss": 1.917, + "step": 3137 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003077097275563985, + "loss": 1.8525, + "step": 3138 + }, + { + "epoch": 0.64, + "learning_rate": 0.000307406360747899, + "loss": 1.9834, + "step": 3139 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030710307716704476, + "loss": 1.9199, + "step": 3140 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030679987694489725, + "loss": 1.8945, + "step": 3141 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030649676021248185, + "loss": 2.04, + "step": 3142 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030619372710078785, + "loss": 2.0303, + "step": 3143 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030589077774076807, + "loss": 1.9375, + "step": 3144 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003055879122633397, + "loss": 1.999, + "step": 3145 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030528513079938334, + "loss": 1.9473, + "step": 3146 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030498243347974307, + "loss": 2.0107, + "step": 3147 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003046798204352272, + "loss": 1.958, + "step": 3148 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030437729179660727, + "loss": 1.9824, + "step": 3149 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030407484769461805, + "loss": 1.9785, + "step": 3150 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003037724882599585, + "loss": 2.0459, + "step": 3151 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003034702136232903, + "loss": 1.8926, + "step": 3152 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003031680239152388, + "loss": 1.9297, + "step": 3153 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030286591926639286, + "loss": 1.9434, + "step": 3154 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003025638998073041, + "loss": 2.0234, + "step": 3155 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003022619656684875, + "loss": 1.9102, + "step": 3156 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003019601169804216, + "loss": 1.8672, + "step": 3157 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030165835387354744, + "loss": 1.9951, + "step": 3158 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003013566764782692, + "loss": 1.9033, + "step": 3159 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030105508492495406, + "loss": 1.9404, + "step": 3160 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030075357934393254, + "loss": 1.9668, + "step": 3161 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030045215986549724, + "loss": 1.874, + "step": 3162 + }, + { + "epoch": 0.64, + "learning_rate": 0.00030015082661990393, + "loss": 2.0645, + "step": 3163 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002998495797373715, + "loss": 1.8867, + "step": 3164 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002995484193480808, + "loss": 1.9775, + "step": 3165 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002992473455821756, + "loss": 1.8818, + "step": 3166 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029894635856976266, + "loss": 1.9102, + "step": 3167 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002986454584409106, + "loss": 2.124, + "step": 3168 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029834464532565053, + "loss": 1.8789, + "step": 3169 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002980439193539766, + "loss": 1.9541, + "step": 3170 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029774328065584475, + "loss": 1.9609, + "step": 3171 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002974427293611732, + "loss": 1.8945, + "step": 3172 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002971422655998428, + "loss": 1.9717, + "step": 3173 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002968418895016962, + "loss": 1.9209, + "step": 3174 + }, + { + "epoch": 0.64, + "learning_rate": 0.00029654160119653835, + "loss": 1.8916, + "step": 3175 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002962414008141359, + "loss": 1.8945, + "step": 3176 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002959412884842181, + "loss": 1.916, + "step": 3177 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029564126433647586, + "loss": 2.0254, + "step": 3178 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029534132850056173, + "loss": 1.9297, + "step": 3179 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029504148110609063, + "loss": 1.916, + "step": 3180 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029474172228263887, + "loss": 2.0625, + "step": 3181 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002944420521597444, + "loss": 1.8877, + "step": 3182 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029414247086690726, + "loss": 1.8398, + "step": 3183 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029384297853358895, + "loss": 1.957, + "step": 3184 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029354357528921205, + "loss": 1.9512, + "step": 3185 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002932442612631614, + "loss": 1.9375, + "step": 3186 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029294503658478275, + "loss": 1.9941, + "step": 3187 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002926459013833834, + "loss": 1.8691, + "step": 3188 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002923468557882322, + "loss": 1.9346, + "step": 3189 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029204789992855884, + "loss": 1.9629, + "step": 3190 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029174903393355454, + "loss": 1.9648, + "step": 3191 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029145025793237143, + "loss": 1.8799, + "step": 3192 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029115157205412333, + "loss": 1.9316, + "step": 3193 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029085297642788454, + "loss": 1.9629, + "step": 3194 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002905544711826903, + "loss": 2.0234, + "step": 3195 + }, + { + "epoch": 0.65, + "learning_rate": 0.00029025605644753737, + "loss": 1.9854, + "step": 3196 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002899577323513829, + "loss": 1.957, + "step": 3197 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028965949902314475, + "loss": 1.9248, + "step": 3198 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028936135659170215, + "loss": 2.04, + "step": 3199 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002890633051858948, + "loss": 2.0146, + "step": 3200 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028876534493452264, + "loss": 1.875, + "step": 3201 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028846747596634705, + "loss": 1.9346, + "step": 3202 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002881696984100891, + "loss": 1.9062, + "step": 3203 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028787201239443075, + "loss": 1.9727, + "step": 3204 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028757441804801463, + "loss": 2.0391, + "step": 3205 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028727691549944323, + "loss": 1.8447, + "step": 3206 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028697950487728, + "loss": 1.9795, + "step": 3207 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028668218631004786, + "loss": 1.9102, + "step": 3208 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028638495992623094, + "loss": 1.9688, + "step": 3209 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028608782585427275, + "loss": 1.9473, + "step": 3210 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028579078422257686, + "loss": 1.9014, + "step": 3211 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002854938351595078, + "loss": 2.0547, + "step": 3212 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002851969787933889, + "loss": 1.9756, + "step": 3213 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028490021525250434, + "loss": 1.9531, + "step": 3214 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002846035446650981, + "loss": 1.9268, + "step": 3215 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002843069671593734, + "loss": 1.9297, + "step": 3216 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002840104828634935, + "loss": 1.9102, + "step": 3217 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028371409190558196, + "loss": 1.9336, + "step": 3218 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028341779441372095, + "loss": 1.9385, + "step": 3219 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028312159051595314, + "loss": 1.9756, + "step": 3220 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002828254803402807, + "loss": 1.8213, + "step": 3221 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028252946401466464, + "loss": 1.9238, + "step": 3222 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002822335416670257, + "loss": 1.916, + "step": 3223 + }, + { + "epoch": 0.65, + "learning_rate": 0.00028193771342524456, + "loss": 2.0801, + "step": 3224 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002816419794171605, + "loss": 1.8779, + "step": 3225 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002813463397705723, + "loss": 1.96, + "step": 3226 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002810507946132382, + "loss": 2.0947, + "step": 3227 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002807553440728755, + "loss": 1.9023, + "step": 3228 + }, + { + "epoch": 0.66, + "learning_rate": 0.00028045998827716046, + "loss": 1.9277, + "step": 3229 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002801647273537283, + "loss": 1.9502, + "step": 3230 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027986956143017386, + "loss": 1.9629, + "step": 3231 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002795744906340501, + "loss": 1.8574, + "step": 3232 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027927951509286924, + "loss": 1.9424, + "step": 3233 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027898463493410255, + "loss": 1.998, + "step": 3234 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002786898502851801, + "loss": 2.0322, + "step": 3235 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027839516127348997, + "loss": 1.9121, + "step": 3236 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027810056802637993, + "loss": 2.0059, + "step": 3237 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027780607067115563, + "loss": 1.9287, + "step": 3238 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002775116693350813, + "loss": 1.9541, + "step": 3239 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027721736414538037, + "loss": 1.9619, + "step": 3240 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002769231552292336, + "loss": 1.957, + "step": 3241 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002766290427137815, + "loss": 1.9268, + "step": 3242 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027633502672612154, + "loss": 1.875, + "step": 3243 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002760411073933107, + "loss": 1.9707, + "step": 3244 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002757472848423633, + "loss": 1.9678, + "step": 3245 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002754535592002522, + "loss": 1.9023, + "step": 3246 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002751599305939082, + "loss": 1.9316, + "step": 3247 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002748663991502208, + "loss": 1.9414, + "step": 3248 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002745729649960366, + "loss": 1.9453, + "step": 3249 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002742796282581609, + "loss": 1.9023, + "step": 3250 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027398638906335634, + "loss": 1.959, + "step": 3251 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002736932475383436, + "loss": 1.8574, + "step": 3252 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002734002038098015, + "loss": 1.9395, + "step": 3253 + }, + { + "epoch": 0.66, + "learning_rate": 0.000273107258004366, + "loss": 1.9141, + "step": 3254 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027281441024863115, + "loss": 1.9932, + "step": 3255 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027252166066914874, + "loss": 1.9688, + "step": 3256 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027222900939242783, + "loss": 1.9014, + "step": 3257 + }, + { + "epoch": 0.66, + "learning_rate": 0.000271936456544935, + "loss": 1.8887, + "step": 3258 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002716440022530942, + "loss": 1.9893, + "step": 3259 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002713516466432874, + "loss": 1.8916, + "step": 3260 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027105938984185306, + "loss": 1.9189, + "step": 3261 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002707672319750877, + "loss": 1.9521, + "step": 3262 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002704751731692448, + "loss": 1.8662, + "step": 3263 + }, + { + "epoch": 0.66, + "learning_rate": 0.00027018321355053485, + "loss": 1.9541, + "step": 3264 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002698913532451256, + "loss": 1.9541, + "step": 3265 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026959959237914214, + "loss": 1.9512, + "step": 3266 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002693079310786661, + "loss": 1.9531, + "step": 3267 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002690163694697363, + "loss": 1.8877, + "step": 3268 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026872490767834863, + "loss": 1.9551, + "step": 3269 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026843354583045597, + "loss": 1.9854, + "step": 3270 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002681422840519674, + "loss": 1.8809, + "step": 3271 + }, + { + "epoch": 0.66, + "learning_rate": 0.00026785112246874944, + "loss": 1.9395, + "step": 3272 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002675600612066249, + "loss": 1.8477, + "step": 3273 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002672691003913732, + "loss": 1.9375, + "step": 3274 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002669782401487307, + "loss": 1.874, + "step": 3275 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002666874806043899, + "loss": 1.8789, + "step": 3276 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026639682188400016, + "loss": 1.9863, + "step": 3277 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026610626411316683, + "loss": 1.9453, + "step": 3278 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026581580741745226, + "loss": 1.8555, + "step": 3279 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002655254519223746, + "loss": 1.8975, + "step": 3280 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002652351977534082, + "loss": 1.8818, + "step": 3281 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026494504503598394, + "loss": 1.9121, + "step": 3282 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002646549938954891, + "loss": 1.9287, + "step": 3283 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002643650444572663, + "loss": 1.8633, + "step": 3284 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026407519684661497, + "loss": 1.9326, + "step": 3285 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026378545118879005, + "loss": 1.9326, + "step": 3286 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026349580760900233, + "loss": 1.9004, + "step": 3287 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002632062662324192, + "loss": 1.9443, + "step": 3288 + }, + { + "epoch": 0.67, + "learning_rate": 0.000262916827184163, + "loss": 1.9492, + "step": 3289 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002626274905893125, + "loss": 1.8955, + "step": 3290 + }, + { + "epoch": 0.67, + "learning_rate": 0.000262338256572902, + "loss": 1.9795, + "step": 3291 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026204912525992134, + "loss": 1.8789, + "step": 3292 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026176009677531616, + "loss": 1.9502, + "step": 3293 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026147117124398724, + "loss": 1.9238, + "step": 3294 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002611823487907917, + "loss": 1.9131, + "step": 3295 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026089362954054117, + "loss": 1.9033, + "step": 3296 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026060501361800333, + "loss": 1.9717, + "step": 3297 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002603165011479012, + "loss": 1.8662, + "step": 3298 + }, + { + "epoch": 0.67, + "learning_rate": 0.00026002809225491273, + "loss": 1.999, + "step": 3299 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025973978706367106, + "loss": 2.0869, + "step": 3300 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025945158569876526, + "loss": 1.9951, + "step": 3301 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025916348828473857, + "loss": 1.8887, + "step": 3302 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025887549494608975, + "loss": 1.9062, + "step": 3303 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025858760580727274, + "loss": 1.9678, + "step": 3304 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002582998209926965, + "loss": 1.9297, + "step": 3305 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025801214062672425, + "loss": 1.9668, + "step": 3306 + }, + { + "epoch": 0.67, + "learning_rate": 0.000257724564833675, + "loss": 1.8633, + "step": 3307 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025743709373782177, + "loss": 1.9277, + "step": 3308 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002571497274633926, + "loss": 1.9492, + "step": 3309 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025686246613457046, + "loss": 1.8955, + "step": 3310 + }, + { + "epoch": 0.67, + "learning_rate": 0.000256575309875493, + "loss": 1.8516, + "step": 3311 + }, + { + "epoch": 0.67, + "learning_rate": 0.000256288258810252, + "loss": 1.9814, + "step": 3312 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025600131306289397, + "loss": 1.9014, + "step": 3313 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002557144727574203, + "loss": 2.0, + "step": 3314 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002554277380177864, + "loss": 1.8975, + "step": 3315 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025514110896790183, + "loss": 1.8975, + "step": 3316 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002548545857316311, + "loss": 1.9199, + "step": 3317 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002545681684327928, + "loss": 1.9678, + "step": 3318 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025428185719515945, + "loss": 1.874, + "step": 3319 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002539956521424581, + "loss": 1.9053, + "step": 3320 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002537095533983697, + "loss": 1.9258, + "step": 3321 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025342356108652903, + "loss": 1.9883, + "step": 3322 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025313767533052556, + "loss": 1.959, + "step": 3323 + }, + { + "epoch": 0.67, + "learning_rate": 0.00025285189625390194, + "loss": 2.0654, + "step": 3324 + }, + { + "epoch": 0.67, + "learning_rate": 0.0002525662239801553, + "loss": 1.9336, + "step": 3325 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002522806586327364, + "loss": 1.916, + "step": 3326 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025199520033504985, + "loss": 1.9766, + "step": 3327 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025170984921045383, + "loss": 1.9043, + "step": 3328 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002514246053822601, + "loss": 1.9932, + "step": 3329 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002511394689737346, + "loss": 1.9307, + "step": 3330 + }, + { + "epoch": 0.68, + "learning_rate": 0.00025085444010809635, + "loss": 1.9707, + "step": 3331 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002505695189085181, + "loss": 1.9053, + "step": 3332 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002502847054981262, + "loss": 1.9365, + "step": 3333 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002500000000000001, + "loss": 1.9053, + "step": 3334 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002497154025371726, + "loss": 1.9062, + "step": 3335 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002494309132326304, + "loss": 1.9678, + "step": 3336 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002491465322093128, + "loss": 1.8613, + "step": 3337 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024886225959011246, + "loss": 1.9395, + "step": 3338 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002485780954978753, + "loss": 1.9258, + "step": 3339 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002482940400554007, + "loss": 1.832, + "step": 3340 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024801009338544023, + "loss": 1.9277, + "step": 3341 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002477262556106994, + "loss": 1.9385, + "step": 3342 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024744252685383596, + "loss": 1.8115, + "step": 3343 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002471589072374606, + "loss": 1.8652, + "step": 3344 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024687539688413723, + "loss": 1.9062, + "step": 3345 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024659199591638267, + "loss": 1.9131, + "step": 3346 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024630870445666573, + "loss": 1.9268, + "step": 3347 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024602552262740834, + "loss": 1.9121, + "step": 3348 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002457424505509853, + "loss": 1.9121, + "step": 3349 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002454594883497235, + "loss": 1.9736, + "step": 3350 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002451766361459025, + "loss": 1.9668, + "step": 3351 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002448938940617546, + "loss": 1.9688, + "step": 3352 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024461126221946423, + "loss": 1.8438, + "step": 3353 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024432874074116815, + "loss": 1.9883, + "step": 3354 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024404632974895575, + "loss": 1.9629, + "step": 3355 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002437640293648683, + "loss": 1.9473, + "step": 3356 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024348183971089927, + "loss": 1.9072, + "step": 3357 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024319976090899482, + "loss": 1.9189, + "step": 3358 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024291779308105237, + "loss": 1.9287, + "step": 3359 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024263593634892233, + "loss": 2.0117, + "step": 3360 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024235419083440613, + "loss": 1.8906, + "step": 3361 + }, + { + "epoch": 0.68, + "learning_rate": 0.000242072556659258, + "loss": 1.8584, + "step": 3362 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024179103394518349, + "loss": 1.9131, + "step": 3363 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024150962281384005, + "loss": 1.9004, + "step": 3364 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024122832338683726, + "loss": 1.9658, + "step": 3365 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002409471357857359, + "loss": 1.8203, + "step": 3366 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002406660601320489, + "loss": 1.8594, + "step": 3367 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024038509654724077, + "loss": 2.0225, + "step": 3368 + }, + { + "epoch": 0.68, + "learning_rate": 0.00024010424515272732, + "loss": 1.9385, + "step": 3369 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023982350606987585, + "loss": 1.9697, + "step": 3370 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023954287942000552, + "loss": 1.9648, + "step": 3371 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023926236532438645, + "loss": 1.9346, + "step": 3372 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023898196390424042, + "loss": 2.0107, + "step": 3373 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023870167528074066, + "loss": 1.8604, + "step": 3374 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002384214995750112, + "loss": 1.9443, + "step": 3375 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002381414369081274, + "loss": 1.958, + "step": 3376 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023786148740111618, + "loss": 1.8877, + "step": 3377 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023758165117495505, + "loss": 1.957, + "step": 3378 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002373019283505727, + "loss": 1.9414, + "step": 3379 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023702231904884895, + "loss": 1.8691, + "step": 3380 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023674282339061477, + "loss": 2.0078, + "step": 3381 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002364634414966515, + "loss": 1.9258, + "step": 3382 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023618417348769138, + "loss": 1.9043, + "step": 3383 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023590501948441805, + "loss": 1.8379, + "step": 3384 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023562597960746524, + "loss": 1.9941, + "step": 3385 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023534705397741735, + "loss": 1.8428, + "step": 3386 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023506824271480987, + "loss": 1.8096, + "step": 3387 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002347895459401288, + "loss": 1.9336, + "step": 3388 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023451096377381014, + "loss": 1.8682, + "step": 3389 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023423249633624106, + "loss": 2.0273, + "step": 3390 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023395414374775865, + "loss": 1.9238, + "step": 3391 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023367590612865036, + "loss": 1.8975, + "step": 3392 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002333977835991545, + "loss": 1.9121, + "step": 3393 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023311977627945885, + "loss": 1.9033, + "step": 3394 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023284188428970222, + "loss": 1.8281, + "step": 3395 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023256410774997284, + "loss": 1.8721, + "step": 3396 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002322864467803097, + "loss": 1.9453, + "step": 3397 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023200890150070147, + "loss": 1.8477, + "step": 3398 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023173147203108663, + "loss": 1.96, + "step": 3399 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023145415849135426, + "loss": 1.8496, + "step": 3400 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023117696100134261, + "loss": 1.9766, + "step": 3401 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023089987968084036, + "loss": 1.916, + "step": 3402 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023062291464958585, + "loss": 1.9727, + "step": 3403 + }, + { + "epoch": 0.69, + "learning_rate": 0.000230346066027267, + "loss": 1.9355, + "step": 3404 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002300693339335212, + "loss": 1.8652, + "step": 3405 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002297927184879363, + "loss": 1.8809, + "step": 3406 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022951621981004872, + "loss": 1.9336, + "step": 3407 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022923983801934522, + "loss": 1.9316, + "step": 3408 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002289635732352619, + "loss": 1.9609, + "step": 3409 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022868742557718402, + "loss": 1.9111, + "step": 3410 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022841139516444626, + "loss": 1.9639, + "step": 3411 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002281354821163326, + "loss": 1.8564, + "step": 3412 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002278596865520768, + "loss": 1.8984, + "step": 3413 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022758400859086114, + "loss": 2.0312, + "step": 3414 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022730844835181757, + "loss": 1.8945, + "step": 3415 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002270330059540272, + "loss": 1.9619, + "step": 3416 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022675768151651993, + "loss": 1.9502, + "step": 3417 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022648247515827453, + "loss": 1.9072, + "step": 3418 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022620738699821936, + "loss": 2.0205, + "step": 3419 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002259324171552312, + "loss": 1.9053, + "step": 3420 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022565756574813574, + "loss": 1.8613, + "step": 3421 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002253828328957076, + "loss": 1.9746, + "step": 3422 + }, + { + "epoch": 0.69, + "learning_rate": 0.00022510821871667047, + "loss": 1.9854, + "step": 3423 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002248337233296961, + "loss": 1.8799, + "step": 3424 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022455934685340558, + "loss": 1.8652, + "step": 3425 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022428508940636804, + "loss": 1.8857, + "step": 3426 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022401095110710135, + "loss": 1.8896, + "step": 3427 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022373693207407226, + "loss": 1.9121, + "step": 3428 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022346303242569537, + "loss": 2.0449, + "step": 3429 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002231892522803343, + "loss": 1.9258, + "step": 3430 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022291559175630044, + "loss": 1.8936, + "step": 3431 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022264205097185407, + "loss": 1.9395, + "step": 3432 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022236863004520323, + "loss": 1.9082, + "step": 3433 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002220953290945043, + "loss": 1.9131, + "step": 3434 + }, + { + "epoch": 0.7, + "learning_rate": 0.000221822148237862, + "loss": 1.9844, + "step": 3435 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002215490875933292, + "loss": 1.8896, + "step": 3436 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022127614727890626, + "loss": 1.8848, + "step": 3437 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022100332741254236, + "loss": 1.9775, + "step": 3438 + }, + { + "epoch": 0.7, + "learning_rate": 0.000220730628112134, + "loss": 1.9473, + "step": 3439 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022045804949552555, + "loss": 1.8906, + "step": 3440 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022018559168050995, + "loss": 1.9248, + "step": 3441 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021991325478482694, + "loss": 1.9004, + "step": 3442 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021964103892616477, + "loss": 1.835, + "step": 3443 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002193689442221593, + "loss": 1.9688, + "step": 3444 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021909697079039363, + "loss": 1.8291, + "step": 3445 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021882511874839878, + "loss": 1.9551, + "step": 3446 + }, + { + "epoch": 0.7, + "learning_rate": 0.000218553388213653, + "loss": 1.8857, + "step": 3447 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021828177930358251, + "loss": 1.9297, + "step": 3448 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021801029213556052, + "loss": 1.9639, + "step": 3449 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021773892682690782, + "loss": 1.957, + "step": 3450 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021746768349489281, + "loss": 1.9707, + "step": 3451 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002171965622567308, + "loss": 1.8408, + "step": 3452 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021692556322958412, + "loss": 1.8887, + "step": 3453 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021665468653056307, + "loss": 1.8164, + "step": 3454 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021638393227672444, + "loss": 1.8701, + "step": 3455 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021611330058507206, + "loss": 1.8818, + "step": 3456 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002158427915725573, + "loss": 1.8828, + "step": 3457 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021557240535607835, + "loss": 2.0039, + "step": 3458 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021530214205247995, + "loss": 1.9863, + "step": 3459 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021503200177855426, + "loss": 1.8877, + "step": 3460 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002147619846510399, + "loss": 1.8652, + "step": 3461 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021449209078662224, + "loss": 1.8467, + "step": 3462 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021422232030193374, + "loss": 1.8848, + "step": 3463 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002139526733135534, + "loss": 1.9561, + "step": 3464 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021368314993800675, + "loss": 1.7588, + "step": 3465 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021341375029176562, + "loss": 1.9668, + "step": 3466 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021314447449124918, + "loss": 1.8691, + "step": 3467 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021287532265282239, + "loss": 1.8525, + "step": 3468 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002126062948927966, + "loss": 1.9512, + "step": 3469 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021233739132743007, + "loss": 1.9561, + "step": 3470 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021206861207292716, + "loss": 1.8857, + "step": 3471 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021179995724543823, + "loss": 1.915, + "step": 3472 + }, + { + "epoch": 0.7, + "learning_rate": 0.00021153142696106036, + "loss": 1.876, + "step": 3473 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021126302133583647, + "loss": 1.877, + "step": 3474 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002109947404857554, + "loss": 1.9785, + "step": 3475 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021072658452675274, + "loss": 1.793, + "step": 3476 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021045855357470944, + "loss": 1.9277, + "step": 3477 + }, + { + "epoch": 0.71, + "learning_rate": 0.00021019064774545283, + "loss": 1.9092, + "step": 3478 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020992286715475623, + "loss": 1.9795, + "step": 3479 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020965521191833848, + "loss": 1.877, + "step": 3480 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002093876821518645, + "loss": 1.8721, + "step": 3481 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002091202779709447, + "loss": 1.9912, + "step": 3482 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020885299949113578, + "loss": 1.8994, + "step": 3483 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020858584682793946, + "loss": 1.9248, + "step": 3484 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002083188200968036, + "loss": 1.959, + "step": 3485 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020805191941312157, + "loss": 1.8428, + "step": 3486 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020778514489223205, + "loss": 1.9062, + "step": 3487 + }, + { + "epoch": 0.71, + "learning_rate": 0.000207518496649419, + "loss": 1.9434, + "step": 3488 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020725197479991265, + "loss": 1.9385, + "step": 3489 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020698557945888774, + "loss": 1.9229, + "step": 3490 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002067193107414645, + "loss": 1.835, + "step": 3491 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020645316876270886, + "loss": 1.9746, + "step": 3492 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002061871536376319, + "loss": 1.9258, + "step": 3493 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020592126548118933, + "loss": 1.958, + "step": 3494 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020565550440828278, + "loss": 1.9453, + "step": 3495 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002053898705337583, + "loss": 1.8408, + "step": 3496 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002051243639724071, + "loss": 1.8701, + "step": 3497 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002048589848389657, + "loss": 1.8574, + "step": 3498 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020459373324811548, + "loss": 1.9717, + "step": 3499 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020432860931448243, + "loss": 1.9238, + "step": 3500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020406361315263732, + "loss": 1.8936, + "step": 3501 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020379874487709633, + "loss": 1.9053, + "step": 3502 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020353400460231975, + "loss": 1.9541, + "step": 3503 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020326939244271253, + "loss": 1.9238, + "step": 3504 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002030049085126247, + "loss": 1.9219, + "step": 3505 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020274055292635086, + "loss": 1.9238, + "step": 3506 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020247632579812957, + "loss": 1.8965, + "step": 3507 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002022122272421446, + "loss": 1.9043, + "step": 3508 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020194825737252366, + "loss": 1.873, + "step": 3509 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020168441630333877, + "loss": 1.9014, + "step": 3510 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020142070414860702, + "loss": 1.9707, + "step": 3511 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020115712102228877, + "loss": 1.9365, + "step": 3512 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020089366703828965, + "loss": 1.8818, + "step": 3513 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020063034231045852, + "loss": 1.9316, + "step": 3514 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020036714695258922, + "loss": 2.0107, + "step": 3515 + }, + { + "epoch": 0.71, + "learning_rate": 0.00020010408107841915, + "loss": 1.8604, + "step": 3516 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019984114480162973, + "loss": 1.9785, + "step": 3517 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019957833823584692, + "loss": 1.8213, + "step": 3518 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019931566149463992, + "loss": 1.8486, + "step": 3519 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019905311469152226, + "loss": 1.9648, + "step": 3520 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019879069793995153, + "loss": 1.8408, + "step": 3521 + }, + { + "epoch": 0.71, + "learning_rate": 0.00019852841135332855, + "loss": 1.8604, + "step": 3522 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019826625504499806, + "loss": 1.9932, + "step": 3523 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001980042291282489, + "loss": 1.8242, + "step": 3524 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001977423337163129, + "loss": 1.9434, + "step": 3525 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019748056892236605, + "loss": 1.915, + "step": 3526 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019721893485952785, + "loss": 1.8662, + "step": 3527 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019695743164086104, + "loss": 1.9355, + "step": 3528 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019669605937937192, + "loss": 1.8926, + "step": 3529 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019643481818801002, + "loss": 1.8916, + "step": 3530 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019617370817966872, + "loss": 2.0068, + "step": 3531 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019591272946718413, + "loss": 1.9365, + "step": 3532 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019565188216333611, + "loss": 1.9648, + "step": 3533 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019539116638084758, + "loss": 1.9678, + "step": 3534 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001951305822323845, + "loss": 1.8018, + "step": 3535 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019487012983055585, + "loss": 2.0508, + "step": 3536 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019460980928791405, + "loss": 1.9414, + "step": 3537 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019434962071695434, + "loss": 1.9639, + "step": 3538 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019408956423011458, + "loss": 1.7695, + "step": 3539 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001938296399397762, + "loss": 1.9668, + "step": 3540 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019356984795826333, + "loss": 1.9014, + "step": 3541 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019331018839784236, + "loss": 1.9873, + "step": 3542 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001930506613707233, + "loss": 1.8232, + "step": 3543 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019279126698905835, + "loss": 1.8818, + "step": 3544 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001925320053649422, + "loss": 1.8994, + "step": 3545 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019227287661041293, + "loss": 1.8438, + "step": 3546 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019201388083745036, + "loss": 1.8203, + "step": 3547 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019175501815797759, + "loss": 1.8848, + "step": 3548 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001914962886838595, + "loss": 1.8564, + "step": 3549 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001912376925269041, + "loss": 1.9277, + "step": 3550 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019097922979886128, + "loss": 1.9355, + "step": 3551 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001907209006114232, + "loss": 1.874, + "step": 3552 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019046270507622505, + "loss": 2.0264, + "step": 3553 + }, + { + "epoch": 0.72, + "learning_rate": 0.00019020464330484323, + "loss": 1.918, + "step": 3554 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018994671540879704, + "loss": 1.874, + "step": 3555 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018968892149954809, + "loss": 1.9355, + "step": 3556 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018943126168849945, + "loss": 1.8711, + "step": 3557 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018917373608699634, + "loss": 1.9404, + "step": 3558 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018891634480632663, + "loss": 1.9219, + "step": 3559 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018865908795771924, + "loss": 1.958, + "step": 3560 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018840196565234575, + "loss": 2.0029, + "step": 3561 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018814497800131937, + "loss": 2.0322, + "step": 3562 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018788812511569492, + "loss": 1.9082, + "step": 3563 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001876314071064691, + "loss": 1.8994, + "step": 3564 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018737482408458012, + "loss": 2.0215, + "step": 3565 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018711837616090855, + "loss": 2.041, + "step": 3566 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001868620634462756, + "loss": 1.9834, + "step": 3567 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018660588605144484, + "loss": 1.8223, + "step": 3568 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018634984408712118, + "loss": 1.916, + "step": 3569 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018609393766395082, + "loss": 1.9482, + "step": 3570 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001858381668925212, + "loss": 1.8359, + "step": 3571 + }, + { + "epoch": 0.72, + "learning_rate": 0.00018558253188336176, + "loss": 1.916, + "step": 3572 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001853270327469428, + "loss": 1.9629, + "step": 3573 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018507166959367578, + "loss": 1.9688, + "step": 3574 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001848164425339139, + "loss": 1.9229, + "step": 3575 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018456135167795135, + "loss": 1.9492, + "step": 3576 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018430639713602314, + "loss": 1.9844, + "step": 3577 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018405157901830595, + "loss": 1.8994, + "step": 3578 + }, + { + "epoch": 0.73, + "learning_rate": 0.000183796897434917, + "loss": 1.8691, + "step": 3579 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018354235249591473, + "loss": 1.876, + "step": 3580 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018328794431129826, + "loss": 1.9092, + "step": 3581 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001830336729910081, + "loss": 1.8662, + "step": 3582 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018277953864492548, + "loss": 1.877, + "step": 3583 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018252554138287191, + "loss": 1.9521, + "step": 3584 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001822716813146106, + "loss": 1.7842, + "step": 3585 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001820179585498446, + "loss": 1.9863, + "step": 3586 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018176437319821786, + "loss": 1.874, + "step": 3587 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018151092536931523, + "loss": 1.7959, + "step": 3588 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001812576151726621, + "loss": 1.9062, + "step": 3589 + }, + { + "epoch": 0.73, + "learning_rate": 0.000181004442717724, + "loss": 1.8896, + "step": 3590 + }, + { + "epoch": 0.73, + "learning_rate": 0.00018075140811390738, + "loss": 1.8467, + "step": 3591 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001804985114705588, + "loss": 2.0049, + "step": 3592 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001802457528969652, + "loss": 1.835, + "step": 3593 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001799931325023542, + "loss": 1.9424, + "step": 3594 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017974065039589332, + "loss": 1.8838, + "step": 3595 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017948830668669047, + "loss": 1.7344, + "step": 3596 + }, + { + "epoch": 0.73, + "learning_rate": 0.000179236101483794, + "loss": 1.9717, + "step": 3597 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017898403489619198, + "loss": 1.8896, + "step": 3598 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017873210703281284, + "loss": 1.9502, + "step": 3599 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001784803180025247, + "loss": 1.873, + "step": 3600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017822866791413638, + "loss": 1.9121, + "step": 3601 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017797715687639594, + "loss": 1.8994, + "step": 3602 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001777257849979918, + "loss": 1.916, + "step": 3603 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017747455238755223, + "loss": 1.8867, + "step": 3604 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017722345915364507, + "loss": 1.9668, + "step": 3605 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017697250540477788, + "loss": 1.9365, + "step": 3606 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001767216912493984, + "loss": 1.8994, + "step": 3607 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017647101679589373, + "loss": 1.8525, + "step": 3608 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017622048215259028, + "loss": 1.8984, + "step": 3609 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001759700874277546, + "loss": 1.8809, + "step": 3610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017571983272959284, + "loss": 1.8457, + "step": 3611 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017546971816624995, + "loss": 1.9189, + "step": 3612 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017521974384581112, + "loss": 1.9316, + "step": 3613 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001749699098763003, + "loss": 1.9951, + "step": 3614 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017472021636568108, + "loss": 1.9102, + "step": 3615 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001744706634218562, + "loss": 1.9346, + "step": 3616 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017422125115266785, + "loss": 1.7627, + "step": 3617 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017397197966589756, + "loss": 1.9785, + "step": 3618 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017372284906926543, + "loss": 1.8164, + "step": 3619 + }, + { + "epoch": 0.73, + "learning_rate": 0.00017347385947043143, + "loss": 1.8447, + "step": 3620 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001732250109769941, + "loss": 1.8643, + "step": 3621 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017297630369649087, + "loss": 1.9375, + "step": 3622 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001727277377363986, + "loss": 1.8643, + "step": 3623 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017247931320413308, + "loss": 1.9355, + "step": 3624 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017223103020704838, + "loss": 1.9668, + "step": 3625 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017198288885243818, + "loss": 1.8525, + "step": 3626 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001717348892475345, + "loss": 1.8545, + "step": 3627 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017148703149950784, + "loss": 1.8525, + "step": 3628 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017123931571546826, + "loss": 1.8545, + "step": 3629 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001709917420024635, + "loss": 1.9131, + "step": 3630 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017074431046748073, + "loss": 1.9521, + "step": 3631 + }, + { + "epoch": 0.74, + "learning_rate": 0.000170497021217445, + "loss": 1.9072, + "step": 3632 + }, + { + "epoch": 0.74, + "learning_rate": 0.00017024987435922053, + "loss": 1.8945, + "step": 3633 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001700028699996094, + "loss": 1.8887, + "step": 3634 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016975600824535226, + "loss": 1.9229, + "step": 3635 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016950928920312857, + "loss": 1.8027, + "step": 3636 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001692627129795555, + "loss": 1.8887, + "step": 3637 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016901627968118888, + "loss": 1.8662, + "step": 3638 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016876998941452292, + "loss": 1.9814, + "step": 3639 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016852384228598967, + "loss": 1.9238, + "step": 3640 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016827783840195915, + "loss": 1.8379, + "step": 3641 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016803197786874013, + "loss": 1.9619, + "step": 3642 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016778626079257903, + "loss": 1.8809, + "step": 3643 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016754068727966003, + "loss": 1.8154, + "step": 3644 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016729525743610575, + "loss": 1.9131, + "step": 3645 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016704997136797673, + "loss": 1.9258, + "step": 3646 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016680482918127087, + "loss": 1.8535, + "step": 3647 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001665598309819245, + "loss": 1.958, + "step": 3648 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016631497687581126, + "loss": 1.8252, + "step": 3649 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016607026696874262, + "loss": 1.8262, + "step": 3650 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016582570136646784, + "loss": 1.8926, + "step": 3651 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016558128017467406, + "loss": 1.748, + "step": 3652 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016533700349898552, + "loss": 1.8984, + "step": 3653 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001650928714449641, + "loss": 1.874, + "step": 3654 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016484888411810966, + "loss": 1.8672, + "step": 3655 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016460504162385902, + "loss": 2.0068, + "step": 3656 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016436134406758635, + "loss": 1.9111, + "step": 3657 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001641177915546036, + "loss": 1.9453, + "step": 3658 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016387438419016, + "loss": 1.9736, + "step": 3659 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016363112207944152, + "loss": 1.9795, + "step": 3660 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001633880053275722, + "loss": 1.998, + "step": 3661 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016314503403961256, + "loss": 1.8604, + "step": 3662 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001629022083205603, + "loss": 1.916, + "step": 3663 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001626595282753508, + "loss": 1.8154, + "step": 3664 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016241699400885575, + "loss": 1.959, + "step": 3665 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016217460562588455, + "loss": 1.9307, + "step": 3666 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001619323632311828, + "loss": 1.8711, + "step": 3667 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016169026692943388, + "loss": 1.7812, + "step": 3668 + }, + { + "epoch": 0.74, + "learning_rate": 0.00016144831682525734, + "loss": 1.9678, + "step": 3669 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001612065130232096, + "loss": 1.9834, + "step": 3670 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016096485562778446, + "loss": 1.9248, + "step": 3671 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016072334474341154, + "loss": 1.9336, + "step": 3672 + }, + { + "epoch": 0.75, + "learning_rate": 0.000160481980474458, + "loss": 1.9385, + "step": 3673 + }, + { + "epoch": 0.75, + "learning_rate": 0.00016024076292522733, + "loss": 1.96, + "step": 3674 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001599996921999594, + "loss": 1.8779, + "step": 3675 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001597587684028307, + "loss": 1.8896, + "step": 3676 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001595179916379546, + "loss": 1.8584, + "step": 3677 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001592773620093804, + "loss": 1.8408, + "step": 3678 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015903687962109437, + "loss": 1.8525, + "step": 3679 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015879654457701846, + "loss": 2.0322, + "step": 3680 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015855635698101177, + "loss": 1.8496, + "step": 3681 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015831631693686904, + "loss": 1.8828, + "step": 3682 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015807642454832133, + "loss": 1.792, + "step": 3683 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015783667991903634, + "loss": 1.8789, + "step": 3684 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001575970831526172, + "loss": 1.9746, + "step": 3685 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015735763435260382, + "loss": 1.9365, + "step": 3686 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015711833362247202, + "loss": 1.7979, + "step": 3687 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015687918106563326, + "loss": 1.9541, + "step": 3688 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015664017678543519, + "loss": 1.79, + "step": 3689 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015640132088516162, + "loss": 1.9336, + "step": 3690 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001561626134680319, + "loss": 1.9307, + "step": 3691 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015592405463720117, + "loss": 1.8896, + "step": 3692 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015568564449576066, + "loss": 1.9336, + "step": 3693 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015544738314673745, + "loss": 1.9814, + "step": 3694 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001552092706930937, + "loss": 1.9229, + "step": 3695 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015497130723772797, + "loss": 1.7764, + "step": 3696 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001547334928834739, + "loss": 1.875, + "step": 3697 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015449582773310067, + "loss": 1.8115, + "step": 3698 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015425831188931356, + "loss": 1.7969, + "step": 3699 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015402094545475254, + "loss": 1.9326, + "step": 3700 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015378372853199385, + "loss": 1.918, + "step": 3701 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001535466612235484, + "loss": 1.9355, + "step": 3702 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001533097436318629, + "loss": 1.8447, + "step": 3703 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015307297585931916, + "loss": 1.8379, + "step": 3704 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015283635800823414, + "loss": 1.9443, + "step": 3705 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001525998901808604, + "loss": 1.8711, + "step": 3706 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001523635724793852, + "loss": 1.9375, + "step": 3707 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015212740500593125, + "loss": 1.9932, + "step": 3708 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015189138786255642, + "loss": 1.8789, + "step": 3709 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015165552115125325, + "loss": 1.9785, + "step": 3710 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015141980497394936, + "loss": 1.8301, + "step": 3711 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001511842394325077, + "loss": 1.9268, + "step": 3712 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015094882462872555, + "loss": 1.9434, + "step": 3713 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001507135606643355, + "loss": 2.0225, + "step": 3714 + }, + { + "epoch": 0.75, + "learning_rate": 0.000150478447641005, + "loss": 1.9561, + "step": 3715 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001502434856603358, + "loss": 1.8193, + "step": 3716 + }, + { + "epoch": 0.75, + "learning_rate": 0.00015000867482386476, + "loss": 1.8555, + "step": 3717 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001497740152330631, + "loss": 1.8408, + "step": 3718 + }, + { + "epoch": 0.75, + "learning_rate": 0.00014953950698933722, + "loss": 1.8486, + "step": 3719 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001493051501940275, + "loss": 1.8555, + "step": 3720 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014907094494840927, + "loss": 1.8984, + "step": 3721 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014883689135369255, + "loss": 1.8291, + "step": 3722 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014860298951102126, + "loss": 1.8311, + "step": 3723 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014836923952147386, + "loss": 1.9053, + "step": 3724 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014813564148606374, + "loss": 1.8896, + "step": 3725 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014790219550573798, + "loss": 1.7988, + "step": 3726 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014766890168137813, + "loss": 1.8301, + "step": 3727 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014743576011380029, + "loss": 1.9062, + "step": 3728 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001472027709037545, + "loss": 1.9463, + "step": 3729 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014696993415192484, + "loss": 1.8555, + "step": 3730 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014673724995892983, + "loss": 1.8027, + "step": 3731 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014650471842532194, + "loss": 1.7715, + "step": 3732 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014627233965158743, + "loss": 1.9746, + "step": 3733 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014604011373814667, + "loss": 1.9297, + "step": 3734 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014580804078535424, + "loss": 1.9932, + "step": 3735 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014557612089349854, + "loss": 1.8701, + "step": 3736 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014534435416280135, + "loss": 1.8857, + "step": 3737 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014511274069341896, + "loss": 1.958, + "step": 3738 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014488128058544097, + "loss": 1.8643, + "step": 3739 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014464997393889067, + "loss": 1.9141, + "step": 3740 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014441882085372532, + "loss": 1.8926, + "step": 3741 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014418782142983587, + "loss": 1.8945, + "step": 3742 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014395697576704636, + "loss": 1.9424, + "step": 3743 + }, + { + "epoch": 0.76, + "learning_rate": 0.000143726283965115, + "loss": 1.9648, + "step": 3744 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001434957461237331, + "loss": 1.8691, + "step": 3745 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001432653623425254, + "loss": 1.8535, + "step": 3746 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014303513272105056, + "loss": 1.8428, + "step": 3747 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001428050573587999, + "loss": 1.8857, + "step": 3748 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014257513635519865, + "loss": 1.793, + "step": 3749 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014234536980960532, + "loss": 1.874, + "step": 3750 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001421157578213113, + "loss": 1.8867, + "step": 3751 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014188630048954148, + "loss": 1.918, + "step": 3752 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001416569979134536, + "loss": 1.8623, + "step": 3753 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014142785019213906, + "loss": 1.8721, + "step": 3754 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014119885742462169, + "loss": 1.9316, + "step": 3755 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014097001970985896, + "loss": 1.9297, + "step": 3756 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014074133714674113, + "loss": 1.9814, + "step": 3757 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014051280983409125, + "loss": 1.792, + "step": 3758 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001402844378706652, + "loss": 1.8926, + "step": 3759 + }, + { + "epoch": 0.76, + "learning_rate": 0.00014005622135515223, + "loss": 1.9629, + "step": 3760 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013982816038617398, + "loss": 1.918, + "step": 3761 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001396002550622848, + "loss": 1.8369, + "step": 3762 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001393725054819721, + "loss": 1.8379, + "step": 3763 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013914491174365606, + "loss": 1.8115, + "step": 3764 + }, + { + "epoch": 0.76, + "learning_rate": 0.000138917473945689, + "loss": 1.9082, + "step": 3765 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013869019218635642, + "loss": 1.9219, + "step": 3766 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013846306656387604, + "loss": 1.9766, + "step": 3767 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001382360971763982, + "loss": 1.9062, + "step": 3768 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013800928412200546, + "loss": 1.8721, + "step": 3769 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013778262749871334, + "loss": 2.0176, + "step": 3770 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013755612740446965, + "loss": 1.9453, + "step": 3771 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013732978393715407, + "loss": 1.918, + "step": 3772 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013710359719457926, + "loss": 1.7227, + "step": 3773 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013687756727448968, + "loss": 1.9131, + "step": 3774 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013665169427456203, + "loss": 1.8555, + "step": 3775 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013642597829240544, + "loss": 1.8633, + "step": 3776 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013620041942556126, + "loss": 1.9824, + "step": 3777 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013597501777150252, + "loss": 1.9609, + "step": 3778 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001357497734276348, + "loss": 1.8516, + "step": 3779 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001355246864912953, + "loss": 1.8857, + "step": 3780 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013529975705975329, + "loss": 1.876, + "step": 3781 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013507498523021022, + "loss": 1.8291, + "step": 3782 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013485037109979908, + "loss": 1.8945, + "step": 3783 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013462591476558512, + "loss": 1.9043, + "step": 3784 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013440161632456482, + "loss": 1.8809, + "step": 3785 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013417747587366719, + "loss": 1.8682, + "step": 3786 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001339534935097524, + "loss": 1.7871, + "step": 3787 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013372966932961218, + "loss": 1.8398, + "step": 3788 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013350600342997061, + "loss": 1.9209, + "step": 3789 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013328249590748253, + "loss": 1.8799, + "step": 3790 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013305914685873499, + "loss": 1.8975, + "step": 3791 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013283595638024636, + "loss": 1.9404, + "step": 3792 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013261292456846646, + "loss": 1.915, + "step": 3793 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013239005151977618, + "loss": 1.7666, + "step": 3794 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013216733733048859, + "loss": 1.9648, + "step": 3795 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013194478209684746, + "loss": 1.876, + "step": 3796 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013172238591502793, + "loss": 1.9141, + "step": 3797 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001315001488811367, + "loss": 1.9404, + "step": 3798 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013127807109121164, + "loss": 1.7627, + "step": 3799 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013105615264122155, + "loss": 1.8721, + "step": 3800 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013083439362706673, + "loss": 1.9004, + "step": 3801 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013061279414457823, + "loss": 1.916, + "step": 3802 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001303913542895182, + "loss": 1.751, + "step": 3803 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001301700741575801, + "loss": 1.8682, + "step": 3804 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012994895384438825, + "loss": 1.9785, + "step": 3805 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001297279934454978, + "loss": 1.9795, + "step": 3806 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012950719305639463, + "loss": 1.877, + "step": 3807 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012928655277249596, + "loss": 1.8438, + "step": 3808 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012906607268914949, + "loss": 1.8506, + "step": 3809 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012884575290163353, + "loss": 1.9336, + "step": 3810 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012862559350515745, + "loss": 1.8916, + "step": 3811 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012840559459486144, + "loss": 1.7725, + "step": 3812 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012818575626581574, + "loss": 1.8467, + "step": 3813 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012796607861302183, + "loss": 1.8594, + "step": 3814 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012774656173141142, + "loss": 1.7812, + "step": 3815 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012752720571584648, + "loss": 1.8877, + "step": 3816 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012730801066112026, + "loss": 1.9336, + "step": 3817 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012708897666195552, + "loss": 1.8291, + "step": 3818 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012687010381300624, + "loss": 1.8662, + "step": 3819 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012665139220885614, + "loss": 1.8887, + "step": 3820 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001264328419440197, + "loss": 1.8125, + "step": 3821 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001262144531129415, + "loss": 1.9746, + "step": 3822 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012599622580999605, + "loss": 1.8721, + "step": 3823 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012577816012948872, + "loss": 1.918, + "step": 3824 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012556025616565443, + "loss": 1.8066, + "step": 3825 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001253425140126585, + "loss": 1.8379, + "step": 3826 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001251249337645965, + "loss": 1.9141, + "step": 3827 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012490751551549368, + "loss": 1.9854, + "step": 3828 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012469025935930522, + "loss": 1.9209, + "step": 3829 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012447316538991682, + "loss": 1.7939, + "step": 3830 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012425623370114353, + "loss": 1.9141, + "step": 3831 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001240394643867303, + "loss": 1.8203, + "step": 3832 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012382285754035228, + "loss": 1.8232, + "step": 3833 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012360641325561434, + "loss": 1.8711, + "step": 3834 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001233901316260508, + "loss": 1.9238, + "step": 3835 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001231740127451258, + "loss": 1.8574, + "step": 3836 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001229580567062334, + "loss": 1.8486, + "step": 3837 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012274226360269687, + "loss": 1.9727, + "step": 3838 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001225266335277694, + "loss": 1.873, + "step": 3839 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001223111665746338, + "loss": 1.8428, + "step": 3840 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012209586283640206, + "loss": 1.8643, + "step": 3841 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001218807224061157, + "loss": 1.8848, + "step": 3842 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012166574537674602, + "loss": 1.8662, + "step": 3843 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012145093184119326, + "loss": 1.8604, + "step": 3844 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012123628189228713, + "loss": 1.8711, + "step": 3845 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012102179562278682, + "loss": 1.8125, + "step": 3846 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012080747312538082, + "loss": 1.8682, + "step": 3847 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012059331449268645, + "loss": 1.8457, + "step": 3848 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012037931981725076, + "loss": 1.8545, + "step": 3849 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012016548919154946, + "loss": 1.877, + "step": 3850 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011995182270798766, + "loss": 1.792, + "step": 3851 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011973832045889927, + "loss": 1.9238, + "step": 3852 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011952498253654753, + "loss": 1.9111, + "step": 3853 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011931180903312467, + "loss": 1.9414, + "step": 3854 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011909880004075152, + "loss": 1.9238, + "step": 3855 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011888595565147836, + "loss": 1.873, + "step": 3856 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011867327595728372, + "loss": 1.8037, + "step": 3857 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011846076105007526, + "loss": 1.7227, + "step": 3858 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011824841102168965, + "loss": 1.9561, + "step": 3859 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011803622596389185, + "loss": 1.8506, + "step": 3860 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011782420596837585, + "loss": 1.8545, + "step": 3861 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011761235112676433, + "loss": 1.918, + "step": 3862 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011740066153060847, + "loss": 1.8252, + "step": 3863 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001171891372713878, + "loss": 1.8936, + "step": 3864 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011697777844051105, + "loss": 1.8086, + "step": 3865 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011676658512931477, + "loss": 1.7822, + "step": 3866 + }, + { + "epoch": 0.78, + "learning_rate": 0.00011655555742906437, + "loss": 1.8945, + "step": 3867 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011634469543095383, + "loss": 1.8047, + "step": 3868 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011613399922610512, + "loss": 1.8691, + "step": 3869 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011592346890556876, + "loss": 1.7744, + "step": 3870 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011571310456032341, + "loss": 2.0449, + "step": 3871 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011550290628127653, + "loss": 1.9395, + "step": 3872 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011529287415926309, + "loss": 1.8545, + "step": 3873 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011508300828504681, + "loss": 1.8486, + "step": 3874 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011487330874931951, + "loss": 1.9023, + "step": 3875 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011466377564270086, + "loss": 2.042, + "step": 3876 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011445440905573857, + "loss": 1.8652, + "step": 3877 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011424520907890889, + "loss": 1.7891, + "step": 3878 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001140361758026156, + "loss": 1.8076, + "step": 3879 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011382730931719042, + "loss": 1.8418, + "step": 3880 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001136186097128934, + "loss": 1.8379, + "step": 3881 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011341007707991225, + "loss": 1.8301, + "step": 3882 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011320171150836228, + "loss": 1.874, + "step": 3883 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011299351308828709, + "loss": 1.7539, + "step": 3884 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011278548190965777, + "loss": 1.8848, + "step": 3885 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011257761806237299, + "loss": 1.9297, + "step": 3886 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011236992163625932, + "loss": 1.8662, + "step": 3887 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011216239272107093, + "loss": 1.8281, + "step": 3888 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011195503140648983, + "loss": 1.8604, + "step": 3889 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001117478377821251, + "loss": 1.96, + "step": 3890 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011154081193751387, + "loss": 1.8467, + "step": 3891 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011133395396212048, + "loss": 1.7969, + "step": 3892 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011112726394533662, + "loss": 1.9297, + "step": 3893 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011092074197648178, + "loss": 1.9277, + "step": 3894 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011071438814480239, + "loss": 1.9551, + "step": 3895 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011050820253947259, + "loss": 1.8574, + "step": 3896 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011030218524959373, + "loss": 1.917, + "step": 3897 + }, + { + "epoch": 0.79, + "learning_rate": 0.00011009633636419431, + "loss": 1.9639, + "step": 3898 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010989065597222996, + "loss": 1.8936, + "step": 3899 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001096851441625839, + "loss": 1.8594, + "step": 3900 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010947980102406597, + "loss": 1.9004, + "step": 3901 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010927462664541377, + "loss": 1.9502, + "step": 3902 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010906962111529118, + "loss": 1.9209, + "step": 3903 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010886478452228987, + "loss": 1.8438, + "step": 3904 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010866011695492812, + "loss": 1.7139, + "step": 3905 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010845561850165093, + "loss": 1.9111, + "step": 3906 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010825128925083089, + "loss": 1.8584, + "step": 3907 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010804712929076676, + "loss": 1.8184, + "step": 3908 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010784313870968465, + "loss": 1.833, + "step": 3909 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010763931759573737, + "loss": 1.9111, + "step": 3910 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010743566603700433, + "loss": 1.9727, + "step": 3911 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001072321841214916, + "loss": 1.8154, + "step": 3912 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001070288719371324, + "loss": 1.8721, + "step": 3913 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010682572957178616, + "loss": 1.8438, + "step": 3914 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010662275711323888, + "loss": 1.876, + "step": 3915 + }, + { + "epoch": 0.79, + "learning_rate": 0.00010641995464920345, + "loss": 1.8232, + "step": 3916 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010621732226731939, + "loss": 1.8984, + "step": 3917 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010601486005515216, + "loss": 1.7236, + "step": 3918 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010581256810019435, + "loss": 1.7373, + "step": 3919 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010561044648986445, + "loss": 1.9219, + "step": 3920 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010540849531150754, + "loss": 1.9453, + "step": 3921 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010520671465239496, + "loss": 1.876, + "step": 3922 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010500510459972451, + "loss": 1.9541, + "step": 3923 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010480366524062041, + "loss": 1.8506, + "step": 3924 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010460239666213261, + "loss": 1.8711, + "step": 3925 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001044012989512378, + "loss": 1.8877, + "step": 3926 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010420037219483841, + "loss": 1.9512, + "step": 3927 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010399961647976313, + "loss": 1.7539, + "step": 3928 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010379903189276684, + "loss": 1.8545, + "step": 3929 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001035986185205306, + "loss": 1.8301, + "step": 3930 + }, + { + "epoch": 0.8, + "learning_rate": 0.000103398376449661, + "loss": 1.8291, + "step": 3931 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001031983057666911, + "loss": 1.8955, + "step": 3932 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010299840655807957, + "loss": 1.7949, + "step": 3933 + }, + { + "epoch": 0.8, + "learning_rate": 0.000102798678910211, + "loss": 1.9316, + "step": 3934 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010259912290939616, + "loss": 1.918, + "step": 3935 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010239973864187118, + "loss": 1.8428, + "step": 3936 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010220052619379844, + "loss": 1.8574, + "step": 3937 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010200148565126566, + "loss": 1.8076, + "step": 3938 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010180261710028665, + "loss": 1.9551, + "step": 3939 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010160392062680058, + "loss": 1.8838, + "step": 3940 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010140539631667228, + "loss": 1.7568, + "step": 3941 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010120704425569249, + "loss": 1.7656, + "step": 3942 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010100886452957714, + "loss": 1.9424, + "step": 3943 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010081085722396788, + "loss": 1.9512, + "step": 3944 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010061302242443204, + "loss": 1.7568, + "step": 3945 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010041536021646203, + "loss": 1.835, + "step": 3946 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010021787068547567, + "loss": 1.8711, + "step": 3947 + }, + { + "epoch": 0.8, + "learning_rate": 0.00010002055391681658, + "loss": 1.9043, + "step": 3948 + }, + { + "epoch": 0.8, + "learning_rate": 9.982340999575334e-05, + "loss": 1.9629, + "step": 3949 + }, + { + "epoch": 0.8, + "learning_rate": 9.962643900747992e-05, + "loss": 1.9023, + "step": 3950 + }, + { + "epoch": 0.8, + "learning_rate": 9.94296410371156e-05, + "loss": 1.9395, + "step": 3951 + }, + { + "epoch": 0.8, + "learning_rate": 9.923301616970509e-05, + "loss": 1.8887, + "step": 3952 + }, + { + "epoch": 0.8, + "learning_rate": 9.903656449021797e-05, + "loss": 1.9482, + "step": 3953 + }, + { + "epoch": 0.8, + "learning_rate": 9.884028608354895e-05, + "loss": 1.8242, + "step": 3954 + }, + { + "epoch": 0.8, + "learning_rate": 9.864418103451828e-05, + "loss": 1.8271, + "step": 3955 + }, + { + "epoch": 0.8, + "learning_rate": 9.844824942787072e-05, + "loss": 1.8711, + "step": 3956 + }, + { + "epoch": 0.8, + "learning_rate": 9.825249134827663e-05, + "loss": 1.8936, + "step": 3957 + }, + { + "epoch": 0.8, + "learning_rate": 9.805690688033114e-05, + "loss": 1.8154, + "step": 3958 + }, + { + "epoch": 0.8, + "learning_rate": 9.786149610855439e-05, + "loss": 1.7695, + "step": 3959 + }, + { + "epoch": 0.8, + "learning_rate": 9.766625911739113e-05, + "loss": 1.8926, + "step": 3960 + }, + { + "epoch": 0.8, + "learning_rate": 9.747119599121163e-05, + "loss": 1.8779, + "step": 3961 + }, + { + "epoch": 0.8, + "learning_rate": 9.72763068143106e-05, + "loss": 1.8838, + "step": 3962 + }, + { + "epoch": 0.8, + "learning_rate": 9.708159167090753e-05, + "loss": 1.9326, + "step": 3963 + }, + { + "epoch": 0.8, + "learning_rate": 9.688705064514702e-05, + "loss": 1.8418, + "step": 3964 + }, + { + "epoch": 0.8, + "learning_rate": 9.66926838210983e-05, + "loss": 1.9795, + "step": 3965 + }, + { + "epoch": 0.8, + "learning_rate": 9.649849128275512e-05, + "loss": 1.9844, + "step": 3966 + }, + { + "epoch": 0.81, + "learning_rate": 9.630447311403623e-05, + "loss": 1.8594, + "step": 3967 + }, + { + "epoch": 0.81, + "learning_rate": 9.61106293987849e-05, + "loss": 1.834, + "step": 3968 + }, + { + "epoch": 0.81, + "learning_rate": 9.591696022076868e-05, + "loss": 1.8066, + "step": 3969 + }, + { + "epoch": 0.81, + "learning_rate": 9.57234656636804e-05, + "loss": 1.8965, + "step": 3970 + }, + { + "epoch": 0.81, + "learning_rate": 9.553014581113667e-05, + "loss": 1.8838, + "step": 3971 + }, + { + "epoch": 0.81, + "learning_rate": 9.533700074667927e-05, + "loss": 1.9121, + "step": 3972 + }, + { + "epoch": 0.81, + "learning_rate": 9.514403055377385e-05, + "loss": 1.8037, + "step": 3973 + }, + { + "epoch": 0.81, + "learning_rate": 9.495123531581113e-05, + "loss": 1.9482, + "step": 3974 + }, + { + "epoch": 0.81, + "learning_rate": 9.475861511610562e-05, + "loss": 1.8545, + "step": 3975 + }, + { + "epoch": 0.81, + "learning_rate": 9.456617003789642e-05, + "loss": 1.8105, + "step": 3976 + }, + { + "epoch": 0.81, + "learning_rate": 9.43739001643471e-05, + "loss": 1.9395, + "step": 3977 + }, + { + "epoch": 0.81, + "learning_rate": 9.418180557854528e-05, + "loss": 1.8555, + "step": 3978 + }, + { + "epoch": 0.81, + "learning_rate": 9.398988636350303e-05, + "loss": 1.8672, + "step": 3979 + }, + { + "epoch": 0.81, + "learning_rate": 9.379814260215663e-05, + "loss": 1.9043, + "step": 3980 + }, + { + "epoch": 0.81, + "learning_rate": 9.360657437736636e-05, + "loss": 1.9199, + "step": 3981 + }, + { + "epoch": 0.81, + "learning_rate": 9.34151817719166e-05, + "loss": 1.8154, + "step": 3982 + }, + { + "epoch": 0.81, + "learning_rate": 9.322396486851626e-05, + "loss": 1.9678, + "step": 3983 + }, + { + "epoch": 0.81, + "learning_rate": 9.303292374979794e-05, + "loss": 1.9141, + "step": 3984 + }, + { + "epoch": 0.81, + "learning_rate": 9.284205849831817e-05, + "loss": 1.8789, + "step": 3985 + }, + { + "epoch": 0.81, + "learning_rate": 9.265136919655787e-05, + "loss": 1.8936, + "step": 3986 + }, + { + "epoch": 0.81, + "learning_rate": 9.246085592692183e-05, + "loss": 1.9062, + "step": 3987 + }, + { + "epoch": 0.81, + "learning_rate": 9.227051877173859e-05, + "loss": 1.8818, + "step": 3988 + }, + { + "epoch": 0.81, + "learning_rate": 9.208035781326057e-05, + "loss": 1.9062, + "step": 3989 + }, + { + "epoch": 0.81, + "learning_rate": 9.189037313366439e-05, + "loss": 1.8438, + "step": 3990 + }, + { + "epoch": 0.81, + "learning_rate": 9.170056481504996e-05, + "loss": 1.9062, + "step": 3991 + }, + { + "epoch": 0.81, + "learning_rate": 9.151093293944141e-05, + "loss": 1.7617, + "step": 3992 + }, + { + "epoch": 0.81, + "learning_rate": 9.132147758878668e-05, + "loss": 1.792, + "step": 3993 + }, + { + "epoch": 0.81, + "learning_rate": 9.113219884495699e-05, + "loss": 1.8252, + "step": 3994 + }, + { + "epoch": 0.81, + "learning_rate": 9.09430967897475e-05, + "loss": 1.9346, + "step": 3995 + }, + { + "epoch": 0.81, + "learning_rate": 9.075417150487713e-05, + "loss": 1.8408, + "step": 3996 + }, + { + "epoch": 0.81, + "learning_rate": 9.056542307198823e-05, + "loss": 1.874, + "step": 3997 + }, + { + "epoch": 0.81, + "learning_rate": 9.037685157264658e-05, + "loss": 1.9814, + "step": 3998 + }, + { + "epoch": 0.81, + "learning_rate": 9.018845708834189e-05, + "loss": 1.8232, + "step": 3999 + }, + { + "epoch": 0.81, + "learning_rate": 9.000023970048726e-05, + "loss": 1.8701, + "step": 4000 + }, + { + "epoch": 0.81, + "learning_rate": 8.981219949041891e-05, + "loss": 1.7861, + "step": 4001 + }, + { + "epoch": 0.81, + "learning_rate": 8.962433653939705e-05, + "loss": 1.8701, + "step": 4002 + }, + { + "epoch": 0.81, + "learning_rate": 8.943665092860487e-05, + "loss": 1.8574, + "step": 4003 + }, + { + "epoch": 0.81, + "learning_rate": 8.924914273914903e-05, + "loss": 1.8633, + "step": 4004 + }, + { + "epoch": 0.81, + "learning_rate": 8.906181205205943e-05, + "loss": 1.9053, + "step": 4005 + }, + { + "epoch": 0.81, + "learning_rate": 8.88746589482895e-05, + "loss": 1.9092, + "step": 4006 + }, + { + "epoch": 0.81, + "learning_rate": 8.868768350871592e-05, + "loss": 1.7559, + "step": 4007 + }, + { + "epoch": 0.81, + "learning_rate": 8.850088581413829e-05, + "loss": 1.959, + "step": 4008 + }, + { + "epoch": 0.81, + "learning_rate": 8.831426594527975e-05, + "loss": 1.8926, + "step": 4009 + }, + { + "epoch": 0.81, + "learning_rate": 8.81278239827864e-05, + "loss": 1.8594, + "step": 4010 + }, + { + "epoch": 0.81, + "learning_rate": 8.794156000722731e-05, + "loss": 1.9141, + "step": 4011 + }, + { + "epoch": 0.81, + "learning_rate": 8.775547409909501e-05, + "loss": 1.8037, + "step": 4012 + }, + { + "epoch": 0.81, + "learning_rate": 8.756956633880475e-05, + "loss": 1.9043, + "step": 4013 + }, + { + "epoch": 0.81, + "learning_rate": 8.738383680669504e-05, + "loss": 1.9648, + "step": 4014 + }, + { + "epoch": 0.81, + "learning_rate": 8.719828558302733e-05, + "loss": 1.8701, + "step": 4015 + }, + { + "epoch": 0.82, + "learning_rate": 8.701291274798584e-05, + "loss": 1.9277, + "step": 4016 + }, + { + "epoch": 0.82, + "learning_rate": 8.682771838167774e-05, + "loss": 1.9893, + "step": 4017 + }, + { + "epoch": 0.82, + "learning_rate": 8.664270256413331e-05, + "loss": 1.8428, + "step": 4018 + }, + { + "epoch": 0.82, + "learning_rate": 8.645786537530526e-05, + "loss": 1.8477, + "step": 4019 + }, + { + "epoch": 0.82, + "learning_rate": 8.627320689506956e-05, + "loss": 1.8633, + "step": 4020 + }, + { + "epoch": 0.82, + "learning_rate": 8.608872720322481e-05, + "loss": 1.8008, + "step": 4021 + }, + { + "epoch": 0.82, + "learning_rate": 8.59044263794922e-05, + "loss": 1.9248, + "step": 4022 + }, + { + "epoch": 0.82, + "learning_rate": 8.572030450351565e-05, + "loss": 1.8096, + "step": 4023 + }, + { + "epoch": 0.82, + "learning_rate": 8.553636165486173e-05, + "loss": 1.8828, + "step": 4024 + }, + { + "epoch": 0.82, + "learning_rate": 8.535259791301996e-05, + "loss": 1.9131, + "step": 4025 + }, + { + "epoch": 0.82, + "learning_rate": 8.516901335740196e-05, + "loss": 1.915, + "step": 4026 + }, + { + "epoch": 0.82, + "learning_rate": 8.498560806734229e-05, + "loss": 1.8135, + "step": 4027 + }, + { + "epoch": 0.82, + "learning_rate": 8.480238212209812e-05, + "loss": 1.7637, + "step": 4028 + }, + { + "epoch": 0.82, + "learning_rate": 8.461933560084878e-05, + "loss": 1.7793, + "step": 4029 + }, + { + "epoch": 0.82, + "learning_rate": 8.443646858269604e-05, + "loss": 1.9092, + "step": 4030 + }, + { + "epoch": 0.82, + "learning_rate": 8.425378114666455e-05, + "loss": 1.7979, + "step": 4031 + }, + { + "epoch": 0.82, + "learning_rate": 8.407127337170096e-05, + "loss": 1.8691, + "step": 4032 + }, + { + "epoch": 0.82, + "learning_rate": 8.388894533667429e-05, + "loss": 1.9287, + "step": 4033 + }, + { + "epoch": 0.82, + "learning_rate": 8.37067971203761e-05, + "loss": 1.9863, + "step": 4034 + }, + { + "epoch": 0.82, + "learning_rate": 8.352482880152023e-05, + "loss": 1.9609, + "step": 4035 + }, + { + "epoch": 0.82, + "learning_rate": 8.334304045874247e-05, + "loss": 1.9248, + "step": 4036 + }, + { + "epoch": 0.82, + "learning_rate": 8.316143217060129e-05, + "loss": 1.9561, + "step": 4037 + }, + { + "epoch": 0.82, + "learning_rate": 8.298000401557704e-05, + "loss": 1.748, + "step": 4038 + }, + { + "epoch": 0.82, + "learning_rate": 8.279875607207226e-05, + "loss": 1.8818, + "step": 4039 + }, + { + "epoch": 0.82, + "learning_rate": 8.26176884184115e-05, + "loss": 1.8584, + "step": 4040 + }, + { + "epoch": 0.82, + "learning_rate": 8.243680113284169e-05, + "loss": 1.8174, + "step": 4041 + }, + { + "epoch": 0.82, + "learning_rate": 8.225609429353187e-05, + "loss": 1.9326, + "step": 4042 + }, + { + "epoch": 0.82, + "learning_rate": 8.207556797857252e-05, + "loss": 1.8936, + "step": 4043 + }, + { + "epoch": 0.82, + "learning_rate": 8.189522226597689e-05, + "loss": 1.9111, + "step": 4044 + }, + { + "epoch": 0.82, + "learning_rate": 8.171505723367961e-05, + "loss": 1.8311, + "step": 4045 + }, + { + "epoch": 0.82, + "learning_rate": 8.153507295953727e-05, + "loss": 1.8555, + "step": 4046 + }, + { + "epoch": 0.82, + "learning_rate": 8.135526952132877e-05, + "loss": 1.9023, + "step": 4047 + }, + { + "epoch": 0.82, + "learning_rate": 8.117564699675434e-05, + "loss": 1.9326, + "step": 4048 + }, + { + "epoch": 0.82, + "learning_rate": 8.099620546343634e-05, + "loss": 1.9004, + "step": 4049 + }, + { + "epoch": 0.82, + "learning_rate": 8.081694499891901e-05, + "loss": 1.8887, + "step": 4050 + }, + { + "epoch": 0.82, + "learning_rate": 8.063786568066805e-05, + "loss": 1.8945, + "step": 4051 + }, + { + "epoch": 0.82, + "learning_rate": 8.045896758607091e-05, + "loss": 1.8896, + "step": 4052 + }, + { + "epoch": 0.82, + "learning_rate": 8.028025079243705e-05, + "loss": 1.874, + "step": 4053 + }, + { + "epoch": 0.82, + "learning_rate": 8.010171537699713e-05, + "loss": 1.8564, + "step": 4054 + }, + { + "epoch": 0.82, + "learning_rate": 7.992336141690393e-05, + "loss": 1.874, + "step": 4055 + }, + { + "epoch": 0.82, + "learning_rate": 7.974518898923117e-05, + "loss": 1.9141, + "step": 4056 + }, + { + "epoch": 0.82, + "learning_rate": 7.95671981709749e-05, + "loss": 1.9346, + "step": 4057 + }, + { + "epoch": 0.82, + "learning_rate": 7.938938903905207e-05, + "loss": 1.8848, + "step": 4058 + }, + { + "epoch": 0.82, + "learning_rate": 7.921176167030125e-05, + "loss": 1.874, + "step": 4059 + }, + { + "epoch": 0.82, + "learning_rate": 7.903431614148276e-05, + "loss": 1.8008, + "step": 4060 + }, + { + "epoch": 0.82, + "learning_rate": 7.885705252927788e-05, + "loss": 1.8457, + "step": 4061 + }, + { + "epoch": 0.82, + "learning_rate": 7.867997091028966e-05, + "loss": 1.8877, + "step": 4062 + }, + { + "epoch": 0.82, + "learning_rate": 7.850307136104246e-05, + "loss": 1.8193, + "step": 4063 + }, + { + "epoch": 0.82, + "learning_rate": 7.832635395798176e-05, + "loss": 1.8809, + "step": 4064 + }, + { + "epoch": 0.83, + "learning_rate": 7.814981877747434e-05, + "loss": 1.835, + "step": 4065 + }, + { + "epoch": 0.83, + "learning_rate": 7.79734658958085e-05, + "loss": 1.8135, + "step": 4066 + }, + { + "epoch": 0.83, + "learning_rate": 7.779729538919351e-05, + "loss": 1.8438, + "step": 4067 + }, + { + "epoch": 0.83, + "learning_rate": 7.762130733375971e-05, + "loss": 1.9443, + "step": 4068 + }, + { + "epoch": 0.83, + "learning_rate": 7.744550180555903e-05, + "loss": 1.8896, + "step": 4069 + }, + { + "epoch": 0.83, + "learning_rate": 7.726987888056431e-05, + "loss": 1.8906, + "step": 4070 + }, + { + "epoch": 0.83, + "learning_rate": 7.709443863466925e-05, + "loss": 1.8291, + "step": 4071 + }, + { + "epoch": 0.83, + "learning_rate": 7.691918114368906e-05, + "loss": 1.9512, + "step": 4072 + }, + { + "epoch": 0.83, + "learning_rate": 7.674410648335956e-05, + "loss": 1.8379, + "step": 4073 + }, + { + "epoch": 0.83, + "learning_rate": 7.65692147293377e-05, + "loss": 1.8818, + "step": 4074 + }, + { + "epoch": 0.83, + "learning_rate": 7.639450595720137e-05, + "loss": 1.9053, + "step": 4075 + }, + { + "epoch": 0.83, + "learning_rate": 7.621998024244948e-05, + "loss": 1.8799, + "step": 4076 + }, + { + "epoch": 0.83, + "learning_rate": 7.604563766050199e-05, + "loss": 1.9473, + "step": 4077 + }, + { + "epoch": 0.83, + "learning_rate": 7.587147828669915e-05, + "loss": 1.9619, + "step": 4078 + }, + { + "epoch": 0.83, + "learning_rate": 7.569750219630272e-05, + "loss": 1.877, + "step": 4079 + }, + { + "epoch": 0.83, + "learning_rate": 7.552370946449483e-05, + "loss": 1.7861, + "step": 4080 + }, + { + "epoch": 0.83, + "learning_rate": 7.535010016637827e-05, + "loss": 1.8369, + "step": 4081 + }, + { + "epoch": 0.83, + "learning_rate": 7.517667437697695e-05, + "loss": 1.8711, + "step": 4082 + }, + { + "epoch": 0.83, + "learning_rate": 7.500343217123546e-05, + "loss": 1.915, + "step": 4083 + }, + { + "epoch": 0.83, + "learning_rate": 7.483037362401856e-05, + "loss": 1.8945, + "step": 4084 + }, + { + "epoch": 0.83, + "learning_rate": 7.46574988101123e-05, + "loss": 1.8018, + "step": 4085 + }, + { + "epoch": 0.83, + "learning_rate": 7.448480780422285e-05, + "loss": 1.8047, + "step": 4086 + }, + { + "epoch": 0.83, + "learning_rate": 7.4312300680977e-05, + "loss": 1.8682, + "step": 4087 + }, + { + "epoch": 0.83, + "learning_rate": 7.41399775149224e-05, + "loss": 1.875, + "step": 4088 + }, + { + "epoch": 0.83, + "learning_rate": 7.396783838052679e-05, + "loss": 1.9199, + "step": 4089 + }, + { + "epoch": 0.83, + "learning_rate": 7.379588335217874e-05, + "loss": 1.8008, + "step": 4090 + }, + { + "epoch": 0.83, + "learning_rate": 7.3624112504187e-05, + "loss": 1.8408, + "step": 4091 + }, + { + "epoch": 0.83, + "learning_rate": 7.345252591078088e-05, + "loss": 1.8408, + "step": 4092 + }, + { + "epoch": 0.83, + "learning_rate": 7.328112364611012e-05, + "loss": 1.834, + "step": 4093 + }, + { + "epoch": 0.83, + "learning_rate": 7.310990578424437e-05, + "loss": 1.8223, + "step": 4094 + }, + { + "epoch": 0.83, + "learning_rate": 7.293887239917429e-05, + "loss": 1.8359, + "step": 4095 + }, + { + "epoch": 0.83, + "learning_rate": 7.276802356481016e-05, + "loss": 1.8643, + "step": 4096 + }, + { + "epoch": 0.83, + "learning_rate": 7.259735935498295e-05, + "loss": 1.8447, + "step": 4097 + }, + { + "epoch": 0.83, + "learning_rate": 7.242687984344382e-05, + "loss": 1.8838, + "step": 4098 + }, + { + "epoch": 0.83, + "learning_rate": 7.225658510386385e-05, + "loss": 1.8438, + "step": 4099 + }, + { + "epoch": 0.83, + "learning_rate": 7.208647520983425e-05, + "loss": 1.8477, + "step": 4100 + }, + { + "epoch": 0.83, + "learning_rate": 7.191655023486682e-05, + "loss": 1.8389, + "step": 4101 + }, + { + "epoch": 0.83, + "learning_rate": 7.174681025239299e-05, + "loss": 1.8623, + "step": 4102 + }, + { + "epoch": 0.83, + "learning_rate": 7.157725533576426e-05, + "loss": 1.8174, + "step": 4103 + }, + { + "epoch": 0.83, + "learning_rate": 7.14078855582525e-05, + "loss": 1.9238, + "step": 4104 + }, + { + "epoch": 0.83, + "learning_rate": 7.123870099304941e-05, + "loss": 1.833, + "step": 4105 + }, + { + "epoch": 0.83, + "learning_rate": 7.106970171326649e-05, + "loss": 1.9785, + "step": 4106 + }, + { + "epoch": 0.83, + "learning_rate": 7.090088779193516e-05, + "loss": 1.7646, + "step": 4107 + }, + { + "epoch": 0.83, + "learning_rate": 7.073225930200722e-05, + "loss": 1.8799, + "step": 4108 + }, + { + "epoch": 0.83, + "learning_rate": 7.05638163163536e-05, + "loss": 1.9141, + "step": 4109 + }, + { + "epoch": 0.83, + "learning_rate": 7.039555890776578e-05, + "loss": 1.9014, + "step": 4110 + }, + { + "epoch": 0.83, + "learning_rate": 7.022748714895444e-05, + "loss": 1.8662, + "step": 4111 + }, + { + "epoch": 0.83, + "learning_rate": 7.00596011125506e-05, + "loss": 1.8525, + "step": 4112 + }, + { + "epoch": 0.83, + "learning_rate": 6.989190087110442e-05, + "loss": 1.8701, + "step": 4113 + }, + { + "epoch": 0.83, + "learning_rate": 6.97243864970864e-05, + "loss": 1.8867, + "step": 4114 + }, + { + "epoch": 0.84, + "learning_rate": 6.955705806288626e-05, + "loss": 1.8135, + "step": 4115 + }, + { + "epoch": 0.84, + "learning_rate": 6.938991564081332e-05, + "loss": 1.9307, + "step": 4116 + }, + { + "epoch": 0.84, + "learning_rate": 6.922295930309692e-05, + "loss": 1.8018, + "step": 4117 + }, + { + "epoch": 0.84, + "learning_rate": 6.905618912188582e-05, + "loss": 1.8262, + "step": 4118 + }, + { + "epoch": 0.84, + "learning_rate": 6.888960516924808e-05, + "loss": 1.8467, + "step": 4119 + }, + { + "epoch": 0.84, + "learning_rate": 6.872320751717175e-05, + "loss": 1.749, + "step": 4120 + }, + { + "epoch": 0.84, + "learning_rate": 6.855699623756395e-05, + "loss": 1.8877, + "step": 4121 + }, + { + "epoch": 0.84, + "learning_rate": 6.839097140225136e-05, + "loss": 1.8232, + "step": 4122 + }, + { + "epoch": 0.84, + "learning_rate": 6.822513308298028e-05, + "loss": 1.7822, + "step": 4123 + }, + { + "epoch": 0.84, + "learning_rate": 6.805948135141616e-05, + "loss": 1.7988, + "step": 4124 + }, + { + "epoch": 0.84, + "learning_rate": 6.78940162791441e-05, + "loss": 1.8271, + "step": 4125 + }, + { + "epoch": 0.84, + "learning_rate": 6.772873793766815e-05, + "loss": 1.9014, + "step": 4126 + }, + { + "epoch": 0.84, + "learning_rate": 6.756364639841218e-05, + "loss": 1.834, + "step": 4127 + }, + { + "epoch": 0.84, + "learning_rate": 6.739874173271882e-05, + "loss": 1.8965, + "step": 4128 + }, + { + "epoch": 0.84, + "learning_rate": 6.723402401185014e-05, + "loss": 1.917, + "step": 4129 + }, + { + "epoch": 0.84, + "learning_rate": 6.70694933069877e-05, + "loss": 1.8809, + "step": 4130 + }, + { + "epoch": 0.84, + "learning_rate": 6.690514968923167e-05, + "loss": 1.8945, + "step": 4131 + }, + { + "epoch": 0.84, + "learning_rate": 6.674099322960186e-05, + "loss": 1.7842, + "step": 4132 + }, + { + "epoch": 0.84, + "learning_rate": 6.65770239990372e-05, + "loss": 1.8174, + "step": 4133 + }, + { + "epoch": 0.84, + "learning_rate": 6.641324206839539e-05, + "loss": 1.9375, + "step": 4134 + }, + { + "epoch": 0.84, + "learning_rate": 6.624964750845325e-05, + "loss": 1.8594, + "step": 4135 + }, + { + "epoch": 0.84, + "learning_rate": 6.608624038990696e-05, + "loss": 1.9766, + "step": 4136 + }, + { + "epoch": 0.84, + "learning_rate": 6.59230207833713e-05, + "loss": 1.8135, + "step": 4137 + }, + { + "epoch": 0.84, + "learning_rate": 6.575998875938016e-05, + "loss": 1.8623, + "step": 4138 + }, + { + "epoch": 0.84, + "learning_rate": 6.559714438838644e-05, + "loss": 1.8613, + "step": 4139 + }, + { + "epoch": 0.84, + "learning_rate": 6.5434487740762e-05, + "loss": 1.874, + "step": 4140 + }, + { + "epoch": 0.84, + "learning_rate": 6.527201888679746e-05, + "loss": 1.916, + "step": 4141 + }, + { + "epoch": 0.84, + "learning_rate": 6.51097378967021e-05, + "loss": 1.8604, + "step": 4142 + }, + { + "epoch": 0.84, + "learning_rate": 6.494764484060444e-05, + "loss": 1.8682, + "step": 4143 + }, + { + "epoch": 0.84, + "learning_rate": 6.478573978855146e-05, + "loss": 1.8896, + "step": 4144 + }, + { + "epoch": 0.84, + "learning_rate": 6.4624022810509e-05, + "loss": 1.791, + "step": 4145 + }, + { + "epoch": 0.84, + "learning_rate": 6.446249397636172e-05, + "loss": 1.8818, + "step": 4146 + }, + { + "epoch": 0.84, + "learning_rate": 6.430115335591291e-05, + "loss": 1.8145, + "step": 4147 + }, + { + "epoch": 0.84, + "learning_rate": 6.414000101888428e-05, + "loss": 1.8506, + "step": 4148 + }, + { + "epoch": 0.84, + "learning_rate": 6.397903703491664e-05, + "loss": 1.8486, + "step": 4149 + }, + { + "epoch": 0.84, + "learning_rate": 6.381826147356907e-05, + "loss": 1.8438, + "step": 4150 + }, + { + "epoch": 0.84, + "learning_rate": 6.365767440431914e-05, + "loss": 1.877, + "step": 4151 + }, + { + "epoch": 0.84, + "learning_rate": 6.349727589656329e-05, + "loss": 1.8359, + "step": 4152 + }, + { + "epoch": 0.84, + "learning_rate": 6.333706601961642e-05, + "loss": 1.8066, + "step": 4153 + }, + { + "epoch": 0.84, + "learning_rate": 6.317704484271164e-05, + "loss": 1.873, + "step": 4154 + }, + { + "epoch": 0.84, + "learning_rate": 6.301721243500092e-05, + "loss": 1.8643, + "step": 4155 + }, + { + "epoch": 0.84, + "learning_rate": 6.285756886555422e-05, + "loss": 1.8594, + "step": 4156 + }, + { + "epoch": 0.84, + "learning_rate": 6.269811420336019e-05, + "loss": 1.7295, + "step": 4157 + }, + { + "epoch": 0.84, + "learning_rate": 6.25388485173256e-05, + "loss": 1.8164, + "step": 4158 + }, + { + "epoch": 0.84, + "learning_rate": 6.237977187627586e-05, + "loss": 1.917, + "step": 4159 + }, + { + "epoch": 0.84, + "learning_rate": 6.222088434895462e-05, + "loss": 1.8057, + "step": 4160 + }, + { + "epoch": 0.84, + "learning_rate": 6.206218600402352e-05, + "loss": 1.8994, + "step": 4161 + }, + { + "epoch": 0.84, + "learning_rate": 6.190367691006288e-05, + "loss": 1.8018, + "step": 4162 + }, + { + "epoch": 0.84, + "learning_rate": 6.174535713557089e-05, + "loss": 1.9492, + "step": 4163 + }, + { + "epoch": 0.85, + "learning_rate": 6.1587226748964e-05, + "loss": 1.9307, + "step": 4164 + }, + { + "epoch": 0.85, + "learning_rate": 6.142928581857693e-05, + "loss": 1.7812, + "step": 4165 + }, + { + "epoch": 0.85, + "learning_rate": 6.127153441266236e-05, + "loss": 1.9248, + "step": 4166 + }, + { + "epoch": 0.85, + "learning_rate": 6.111397259939128e-05, + "loss": 1.7568, + "step": 4167 + }, + { + "epoch": 0.85, + "learning_rate": 6.095660044685275e-05, + "loss": 1.9043, + "step": 4168 + }, + { + "epoch": 0.85, + "learning_rate": 6.079941802305361e-05, + "loss": 1.7744, + "step": 4169 + }, + { + "epoch": 0.85, + "learning_rate": 6.064242539591874e-05, + "loss": 1.8662, + "step": 4170 + }, + { + "epoch": 0.85, + "learning_rate": 6.048562263329138e-05, + "loss": 1.9658, + "step": 4171 + }, + { + "epoch": 0.85, + "learning_rate": 6.032900980293221e-05, + "loss": 1.9092, + "step": 4172 + }, + { + "epoch": 0.85, + "learning_rate": 6.017258697252037e-05, + "loss": 1.7451, + "step": 4173 + }, + { + "epoch": 0.85, + "learning_rate": 6.001635420965235e-05, + "loss": 1.7432, + "step": 4174 + }, + { + "epoch": 0.85, + "learning_rate": 5.986031158184296e-05, + "loss": 1.9326, + "step": 4175 + }, + { + "epoch": 0.85, + "learning_rate": 5.970445915652456e-05, + "loss": 1.7871, + "step": 4176 + }, + { + "epoch": 0.85, + "learning_rate": 5.954879700104732e-05, + "loss": 1.9434, + "step": 4177 + }, + { + "epoch": 0.85, + "learning_rate": 5.939332518267943e-05, + "loss": 1.918, + "step": 4178 + }, + { + "epoch": 0.85, + "learning_rate": 5.9238043768606474e-05, + "loss": 1.9062, + "step": 4179 + }, + { + "epoch": 0.85, + "learning_rate": 5.908295282593207e-05, + "loss": 1.7803, + "step": 4180 + }, + { + "epoch": 0.85, + "learning_rate": 5.892805242167748e-05, + "loss": 1.876, + "step": 4181 + }, + { + "epoch": 0.85, + "learning_rate": 5.87733426227815e-05, + "loss": 1.7881, + "step": 4182 + }, + { + "epoch": 0.85, + "learning_rate": 5.8618823496100426e-05, + "loss": 1.8643, + "step": 4183 + }, + { + "epoch": 0.85, + "learning_rate": 5.8464495108408625e-05, + "loss": 1.9014, + "step": 4184 + }, + { + "epoch": 0.85, + "learning_rate": 5.831035752639763e-05, + "loss": 1.9482, + "step": 4185 + }, + { + "epoch": 0.85, + "learning_rate": 5.81564108166765e-05, + "loss": 1.8564, + "step": 4186 + }, + { + "epoch": 0.85, + "learning_rate": 5.800265504577201e-05, + "loss": 1.8428, + "step": 4187 + }, + { + "epoch": 0.85, + "learning_rate": 5.784909028012858e-05, + "loss": 1.9805, + "step": 4188 + }, + { + "epoch": 0.85, + "learning_rate": 5.769571658610761e-05, + "loss": 1.8047, + "step": 4189 + }, + { + "epoch": 0.85, + "learning_rate": 5.7542534029988436e-05, + "loss": 1.8877, + "step": 4190 + }, + { + "epoch": 0.85, + "learning_rate": 5.7389542677967365e-05, + "loss": 1.8184, + "step": 4191 + }, + { + "epoch": 0.85, + "learning_rate": 5.72367425961583e-05, + "loss": 1.9238, + "step": 4192 + }, + { + "epoch": 0.85, + "learning_rate": 5.708413385059241e-05, + "loss": 1.9844, + "step": 4193 + }, + { + "epoch": 0.85, + "learning_rate": 5.6931716507218236e-05, + "loss": 1.8086, + "step": 4194 + }, + { + "epoch": 0.85, + "learning_rate": 5.677949063190169e-05, + "loss": 1.7568, + "step": 4195 + }, + { + "epoch": 0.85, + "learning_rate": 5.66274562904257e-05, + "loss": 1.9102, + "step": 4196 + }, + { + "epoch": 0.85, + "learning_rate": 5.647561354849079e-05, + "loss": 1.8711, + "step": 4197 + }, + { + "epoch": 0.85, + "learning_rate": 5.632396247171428e-05, + "loss": 1.8887, + "step": 4198 + }, + { + "epoch": 0.85, + "learning_rate": 5.617250312563082e-05, + "loss": 1.8418, + "step": 4199 + }, + { + "epoch": 0.85, + "learning_rate": 5.602123557569239e-05, + "loss": 1.9268, + "step": 4200 + }, + { + "epoch": 0.85, + "learning_rate": 5.587015988726774e-05, + "loss": 1.7549, + "step": 4201 + }, + { + "epoch": 0.85, + "learning_rate": 5.571927612564298e-05, + "loss": 1.9414, + "step": 4202 + }, + { + "epoch": 0.85, + "learning_rate": 5.55685843560213e-05, + "loss": 1.8135, + "step": 4203 + }, + { + "epoch": 0.85, + "learning_rate": 5.541808464352277e-05, + "loss": 1.9072, + "step": 4204 + }, + { + "epoch": 0.85, + "learning_rate": 5.526777705318442e-05, + "loss": 1.9746, + "step": 4205 + }, + { + "epoch": 0.85, + "learning_rate": 5.511766164996046e-05, + "loss": 1.96, + "step": 4206 + }, + { + "epoch": 0.85, + "learning_rate": 5.496773849872183e-05, + "loss": 1.8213, + "step": 4207 + }, + { + "epoch": 0.85, + "learning_rate": 5.4818007664256656e-05, + "loss": 1.8564, + "step": 4208 + }, + { + "epoch": 0.85, + "learning_rate": 5.466846921126961e-05, + "loss": 1.8516, + "step": 4209 + }, + { + "epoch": 0.85, + "learning_rate": 5.451912320438263e-05, + "loss": 1.8545, + "step": 4210 + }, + { + "epoch": 0.85, + "learning_rate": 5.436996970813418e-05, + "loss": 1.8076, + "step": 4211 + }, + { + "epoch": 0.85, + "learning_rate": 5.42210087869795e-05, + "loss": 1.8145, + "step": 4212 + }, + { + "epoch": 0.86, + "learning_rate": 5.407224050529097e-05, + "loss": 1.9375, + "step": 4213 + }, + { + "epoch": 0.86, + "learning_rate": 5.392366492735723e-05, + "loss": 1.8428, + "step": 4214 + }, + { + "epoch": 0.86, + "learning_rate": 5.3775282117384086e-05, + "loss": 1.9766, + "step": 4215 + }, + { + "epoch": 0.86, + "learning_rate": 5.362709213949396e-05, + "loss": 1.749, + "step": 4216 + }, + { + "epoch": 0.86, + "learning_rate": 5.347909505772569e-05, + "loss": 1.8721, + "step": 4217 + }, + { + "epoch": 0.86, + "learning_rate": 5.3331290936034895e-05, + "loss": 1.9082, + "step": 4218 + }, + { + "epoch": 0.86, + "learning_rate": 5.318367983829392e-05, + "loss": 1.8086, + "step": 4219 + }, + { + "epoch": 0.86, + "learning_rate": 5.303626182829158e-05, + "loss": 1.8711, + "step": 4220 + }, + { + "epoch": 0.86, + "learning_rate": 5.288903696973313e-05, + "loss": 1.9238, + "step": 4221 + }, + { + "epoch": 0.86, + "learning_rate": 5.274200532624057e-05, + "loss": 1.873, + "step": 4222 + }, + { + "epoch": 0.86, + "learning_rate": 5.2595166961352527e-05, + "loss": 1.7959, + "step": 4223 + }, + { + "epoch": 0.86, + "learning_rate": 5.244852193852373e-05, + "loss": 1.917, + "step": 4224 + }, + { + "epoch": 0.86, + "learning_rate": 5.23020703211255e-05, + "loss": 1.915, + "step": 4225 + }, + { + "epoch": 0.86, + "learning_rate": 5.2155812172445695e-05, + "loss": 1.8955, + "step": 4226 + }, + { + "epoch": 0.86, + "learning_rate": 5.200974755568849e-05, + "loss": 1.8398, + "step": 4227 + }, + { + "epoch": 0.86, + "learning_rate": 5.186387653397434e-05, + "loss": 1.8896, + "step": 4228 + }, + { + "epoch": 0.86, + "learning_rate": 5.1718199170340094e-05, + "loss": 1.8232, + "step": 4229 + }, + { + "epoch": 0.86, + "learning_rate": 5.157271552773918e-05, + "loss": 1.9385, + "step": 4230 + }, + { + "epoch": 0.86, + "learning_rate": 5.1427425669040795e-05, + "loss": 1.9404, + "step": 4231 + }, + { + "epoch": 0.86, + "learning_rate": 5.128232965703095e-05, + "loss": 1.8652, + "step": 4232 + }, + { + "epoch": 0.86, + "learning_rate": 5.1137427554411365e-05, + "loss": 1.75, + "step": 4233 + }, + { + "epoch": 0.86, + "learning_rate": 5.099271942380024e-05, + "loss": 1.8789, + "step": 4234 + }, + { + "epoch": 0.86, + "learning_rate": 5.084820532773199e-05, + "loss": 1.7793, + "step": 4235 + }, + { + "epoch": 0.86, + "learning_rate": 5.070388532865716e-05, + "loss": 1.8652, + "step": 4236 + }, + { + "epoch": 0.86, + "learning_rate": 5.055975948894226e-05, + "loss": 1.8604, + "step": 4237 + }, + { + "epoch": 0.86, + "learning_rate": 5.041582787087007e-05, + "loss": 1.8389, + "step": 4238 + }, + { + "epoch": 0.86, + "learning_rate": 5.027209053663939e-05, + "loss": 1.9043, + "step": 4239 + }, + { + "epoch": 0.86, + "learning_rate": 5.0128547548364876e-05, + "loss": 1.6826, + "step": 4240 + }, + { + "epoch": 0.86, + "learning_rate": 4.9985198968077626e-05, + "loss": 1.8555, + "step": 4241 + }, + { + "epoch": 0.86, + "learning_rate": 4.98420448577242e-05, + "loss": 1.9688, + "step": 4242 + }, + { + "epoch": 0.86, + "learning_rate": 4.9699085279167665e-05, + "loss": 1.8398, + "step": 4243 + }, + { + "epoch": 0.86, + "learning_rate": 4.955632029418644e-05, + "loss": 1.9648, + "step": 4244 + }, + { + "epoch": 0.86, + "learning_rate": 4.941374996447545e-05, + "loss": 1.916, + "step": 4245 + }, + { + "epoch": 0.86, + "learning_rate": 4.927137435164503e-05, + "loss": 1.834, + "step": 4246 + }, + { + "epoch": 0.86, + "learning_rate": 4.9129193517221504e-05, + "loss": 1.79, + "step": 4247 + }, + { + "epoch": 0.86, + "learning_rate": 4.898720752264729e-05, + "loss": 1.8369, + "step": 4248 + }, + { + "epoch": 0.86, + "learning_rate": 4.884541642928009e-05, + "loss": 1.9551, + "step": 4249 + }, + { + "epoch": 0.86, + "learning_rate": 4.870382029839387e-05, + "loss": 1.8916, + "step": 4250 + }, + { + "epoch": 0.86, + "learning_rate": 4.856241919117821e-05, + "loss": 1.9033, + "step": 4251 + }, + { + "epoch": 0.86, + "learning_rate": 4.842121316873821e-05, + "loss": 1.7773, + "step": 4252 + }, + { + "epoch": 0.86, + "learning_rate": 4.8280202292094844e-05, + "loss": 1.9121, + "step": 4253 + }, + { + "epoch": 0.86, + "learning_rate": 4.8139386622184755e-05, + "loss": 1.8994, + "step": 4254 + }, + { + "epoch": 0.86, + "learning_rate": 4.7998766219860214e-05, + "loss": 1.9111, + "step": 4255 + }, + { + "epoch": 0.86, + "learning_rate": 4.785834114588894e-05, + "loss": 1.8525, + "step": 4256 + }, + { + "epoch": 0.86, + "learning_rate": 4.7718111460954506e-05, + "loss": 1.9678, + "step": 4257 + }, + { + "epoch": 0.86, + "learning_rate": 4.757807722565605e-05, + "loss": 1.8701, + "step": 4258 + }, + { + "epoch": 0.86, + "learning_rate": 4.7438238500507994e-05, + "loss": 1.8281, + "step": 4259 + }, + { + "epoch": 0.86, + "learning_rate": 4.729859534594033e-05, + "loss": 1.7881, + "step": 4260 + }, + { + "epoch": 0.86, + "learning_rate": 4.7159147822298795e-05, + "loss": 1.8916, + "step": 4261 + }, + { + "epoch": 0.87, + "learning_rate": 4.7019895989844355e-05, + "loss": 1.8682, + "step": 4262 + }, + { + "epoch": 0.87, + "learning_rate": 4.688083990875336e-05, + "loss": 1.8467, + "step": 4263 + }, + { + "epoch": 0.87, + "learning_rate": 4.674197963911775e-05, + "loss": 1.8574, + "step": 4264 + }, + { + "epoch": 0.87, + "learning_rate": 4.660331524094491e-05, + "loss": 1.8398, + "step": 4265 + }, + { + "epoch": 0.87, + "learning_rate": 4.6464846774157135e-05, + "loss": 1.7881, + "step": 4266 + }, + { + "epoch": 0.87, + "learning_rate": 4.632657429859266e-05, + "loss": 1.7881, + "step": 4267 + }, + { + "epoch": 0.87, + "learning_rate": 4.6188497874004535e-05, + "loss": 1.8789, + "step": 4268 + }, + { + "epoch": 0.87, + "learning_rate": 4.6050617560061236e-05, + "loss": 1.8135, + "step": 4269 + }, + { + "epoch": 0.87, + "learning_rate": 4.591293341634661e-05, + "loss": 1.8193, + "step": 4270 + }, + { + "epoch": 0.87, + "learning_rate": 4.5775445502359736e-05, + "loss": 1.8066, + "step": 4271 + }, + { + "epoch": 0.87, + "learning_rate": 4.563815387751463e-05, + "loss": 1.8594, + "step": 4272 + }, + { + "epoch": 0.87, + "learning_rate": 4.5501058601140843e-05, + "loss": 1.8125, + "step": 4273 + }, + { + "epoch": 0.87, + "learning_rate": 4.5364159732482756e-05, + "loss": 1.8896, + "step": 4274 + }, + { + "epoch": 0.87, + "learning_rate": 4.522745733070005e-05, + "loss": 1.8701, + "step": 4275 + }, + { + "epoch": 0.87, + "learning_rate": 4.509095145486736e-05, + "loss": 1.9014, + "step": 4276 + }, + { + "epoch": 0.87, + "learning_rate": 4.49546421639746e-05, + "loss": 1.8223, + "step": 4277 + }, + { + "epoch": 0.87, + "learning_rate": 4.481852951692672e-05, + "loss": 1.8652, + "step": 4278 + }, + { + "epoch": 0.87, + "learning_rate": 4.468261357254338e-05, + "loss": 1.8438, + "step": 4279 + }, + { + "epoch": 0.87, + "learning_rate": 4.4546894389559725e-05, + "loss": 1.9209, + "step": 4280 + }, + { + "epoch": 0.87, + "learning_rate": 4.441137202662543e-05, + "loss": 1.8438, + "step": 4281 + }, + { + "epoch": 0.87, + "learning_rate": 4.427604654230527e-05, + "loss": 1.7793, + "step": 4282 + }, + { + "epoch": 0.87, + "learning_rate": 4.4140917995079076e-05, + "loss": 1.7803, + "step": 4283 + }, + { + "epoch": 0.87, + "learning_rate": 4.400598644334136e-05, + "loss": 1.6885, + "step": 4284 + }, + { + "epoch": 0.87, + "learning_rate": 4.3871251945401594e-05, + "loss": 1.8213, + "step": 4285 + }, + { + "epoch": 0.87, + "learning_rate": 4.3736714559484245e-05, + "loss": 1.8008, + "step": 4286 + }, + { + "epoch": 0.87, + "learning_rate": 4.360237434372838e-05, + "loss": 1.8975, + "step": 4287 + }, + { + "epoch": 0.87, + "learning_rate": 4.346823135618788e-05, + "loss": 1.8008, + "step": 4288 + }, + { + "epoch": 0.87, + "learning_rate": 4.333428565483155e-05, + "loss": 1.8369, + "step": 4289 + }, + { + "epoch": 0.87, + "learning_rate": 4.320053729754281e-05, + "loss": 1.8145, + "step": 4290 + }, + { + "epoch": 0.87, + "learning_rate": 4.306698634211975e-05, + "loss": 1.8008, + "step": 4291 + }, + { + "epoch": 0.87, + "learning_rate": 4.2933632846275326e-05, + "loss": 1.8594, + "step": 4292 + }, + { + "epoch": 0.87, + "learning_rate": 4.280047686763716e-05, + "loss": 1.7852, + "step": 4293 + }, + { + "epoch": 0.87, + "learning_rate": 4.2667518463747334e-05, + "loss": 1.8975, + "step": 4294 + }, + { + "epoch": 0.87, + "learning_rate": 4.253475769206255e-05, + "loss": 1.8164, + "step": 4295 + }, + { + "epoch": 0.87, + "learning_rate": 4.2402194609954425e-05, + "loss": 1.7861, + "step": 4296 + }, + { + "epoch": 0.87, + "learning_rate": 4.2269829274708735e-05, + "loss": 1.9775, + "step": 4297 + }, + { + "epoch": 0.87, + "learning_rate": 4.2137661743526065e-05, + "loss": 1.9199, + "step": 4298 + }, + { + "epoch": 0.87, + "learning_rate": 4.2005692073521614e-05, + "loss": 1.874, + "step": 4299 + }, + { + "epoch": 0.87, + "learning_rate": 4.187392032172471e-05, + "loss": 1.791, + "step": 4300 + }, + { + "epoch": 0.87, + "learning_rate": 4.17423465450793e-05, + "loss": 1.8496, + "step": 4301 + }, + { + "epoch": 0.87, + "learning_rate": 4.161097080044407e-05, + "loss": 1.8232, + "step": 4302 + }, + { + "epoch": 0.87, + "learning_rate": 4.147979314459177e-05, + "loss": 1.8643, + "step": 4303 + }, + { + "epoch": 0.87, + "learning_rate": 4.134881363420956e-05, + "loss": 1.834, + "step": 4304 + }, + { + "epoch": 0.87, + "learning_rate": 4.121803232589916e-05, + "loss": 1.7539, + "step": 4305 + }, + { + "epoch": 0.87, + "learning_rate": 4.1087449276176695e-05, + "loss": 1.9111, + "step": 4306 + }, + { + "epoch": 0.87, + "learning_rate": 4.09570645414723e-05, + "loss": 1.8633, + "step": 4307 + }, + { + "epoch": 0.87, + "learning_rate": 4.0826878178130744e-05, + "loss": 1.7334, + "step": 4308 + }, + { + "epoch": 0.87, + "learning_rate": 4.06968902424108e-05, + "loss": 1.8896, + "step": 4309 + }, + { + "epoch": 0.87, + "learning_rate": 4.05671007904857e-05, + "loss": 1.7666, + "step": 4310 + }, + { + "epoch": 0.87, + "learning_rate": 4.0437509878442635e-05, + "loss": 1.9619, + "step": 4311 + }, + { + "epoch": 0.88, + "learning_rate": 4.030811756228331e-05, + "loss": 1.7979, + "step": 4312 + }, + { + "epoch": 0.88, + "learning_rate": 4.0178923897923546e-05, + "loss": 1.9072, + "step": 4313 + }, + { + "epoch": 0.88, + "learning_rate": 4.004992894119303e-05, + "loss": 1.7285, + "step": 4314 + }, + { + "epoch": 0.88, + "learning_rate": 3.99211327478361e-05, + "loss": 1.8438, + "step": 4315 + }, + { + "epoch": 0.88, + "learning_rate": 3.979253537351068e-05, + "loss": 1.8213, + "step": 4316 + }, + { + "epoch": 0.88, + "learning_rate": 3.9664136873788946e-05, + "loss": 1.8291, + "step": 4317 + }, + { + "epoch": 0.88, + "learning_rate": 3.9535937304157446e-05, + "loss": 1.8154, + "step": 4318 + }, + { + "epoch": 0.88, + "learning_rate": 3.940793672001613e-05, + "loss": 1.8418, + "step": 4319 + }, + { + "epoch": 0.88, + "learning_rate": 3.928013517667961e-05, + "loss": 1.9004, + "step": 4320 + }, + { + "epoch": 0.88, + "learning_rate": 3.9152532729376136e-05, + "loss": 1.9434, + "step": 4321 + }, + { + "epoch": 0.88, + "learning_rate": 3.902512943324793e-05, + "loss": 1.7422, + "step": 4322 + }, + { + "epoch": 0.88, + "learning_rate": 3.889792534335118e-05, + "loss": 1.9082, + "step": 4323 + }, + { + "epoch": 0.88, + "learning_rate": 3.877092051465614e-05, + "loss": 1.8076, + "step": 4324 + }, + { + "epoch": 0.88, + "learning_rate": 3.864411500204679e-05, + "loss": 1.7725, + "step": 4325 + }, + { + "epoch": 0.88, + "learning_rate": 3.851750886032085e-05, + "loss": 1.7285, + "step": 4326 + }, + { + "epoch": 0.88, + "learning_rate": 3.839110214419017e-05, + "loss": 1.9307, + "step": 4327 + }, + { + "epoch": 0.88, + "learning_rate": 3.826489490828039e-05, + "loss": 1.9131, + "step": 4328 + }, + { + "epoch": 0.88, + "learning_rate": 3.8138887207130767e-05, + "loss": 1.9199, + "step": 4329 + }, + { + "epoch": 0.88, + "learning_rate": 3.8013079095194306e-05, + "loss": 1.9072, + "step": 4330 + }, + { + "epoch": 0.88, + "learning_rate": 3.788747062683812e-05, + "loss": 1.7725, + "step": 4331 + }, + { + "epoch": 0.88, + "learning_rate": 3.7762061856342567e-05, + "loss": 1.7334, + "step": 4332 + }, + { + "epoch": 0.88, + "learning_rate": 3.763685283790208e-05, + "loss": 1.8486, + "step": 4333 + }, + { + "epoch": 0.88, + "learning_rate": 3.7511843625624654e-05, + "loss": 1.877, + "step": 4334 + }, + { + "epoch": 0.88, + "learning_rate": 3.738703427353191e-05, + "loss": 1.8506, + "step": 4335 + }, + { + "epoch": 0.88, + "learning_rate": 3.7262424835558913e-05, + "loss": 1.8418, + "step": 4336 + }, + { + "epoch": 0.88, + "learning_rate": 3.713801536555483e-05, + "loss": 1.7939, + "step": 4337 + }, + { + "epoch": 0.88, + "learning_rate": 3.7013805917281985e-05, + "loss": 1.8633, + "step": 4338 + }, + { + "epoch": 0.88, + "learning_rate": 3.688979654441627e-05, + "loss": 1.7783, + "step": 4339 + }, + { + "epoch": 0.88, + "learning_rate": 3.6765987300547366e-05, + "loss": 1.7686, + "step": 4340 + }, + { + "epoch": 0.88, + "learning_rate": 3.664237823917843e-05, + "loss": 1.8564, + "step": 4341 + }, + { + "epoch": 0.88, + "learning_rate": 3.65189694137259e-05, + "loss": 1.7852, + "step": 4342 + }, + { + "epoch": 0.88, + "learning_rate": 3.639576087751989e-05, + "loss": 1.8623, + "step": 4343 + }, + { + "epoch": 0.88, + "learning_rate": 3.627275268380392e-05, + "loss": 1.8018, + "step": 4344 + }, + { + "epoch": 0.88, + "learning_rate": 3.6149944885734755e-05, + "loss": 1.9365, + "step": 4345 + }, + { + "epoch": 0.88, + "learning_rate": 3.602733753638277e-05, + "loss": 1.877, + "step": 4346 + }, + { + "epoch": 0.88, + "learning_rate": 3.590493068873163e-05, + "loss": 1.8818, + "step": 4347 + }, + { + "epoch": 0.88, + "learning_rate": 3.5782724395678476e-05, + "loss": 1.8662, + "step": 4348 + }, + { + "epoch": 0.88, + "learning_rate": 3.566071871003351e-05, + "loss": 1.7959, + "step": 4349 + }, + { + "epoch": 0.88, + "learning_rate": 3.553891368452061e-05, + "loss": 1.9053, + "step": 4350 + }, + { + "epoch": 0.88, + "learning_rate": 3.5417309371776674e-05, + "loss": 1.8691, + "step": 4351 + }, + { + "epoch": 0.88, + "learning_rate": 3.5295905824351826e-05, + "loss": 1.7979, + "step": 4352 + }, + { + "epoch": 0.88, + "learning_rate": 3.517470309470972e-05, + "loss": 1.835, + "step": 4353 + }, + { + "epoch": 0.88, + "learning_rate": 3.5053701235226886e-05, + "loss": 1.8242, + "step": 4354 + }, + { + "epoch": 0.88, + "learning_rate": 3.493290029819324e-05, + "loss": 1.793, + "step": 4355 + }, + { + "epoch": 0.88, + "learning_rate": 3.481230033581201e-05, + "loss": 1.7334, + "step": 4356 + }, + { + "epoch": 0.88, + "learning_rate": 3.469190140019923e-05, + "loss": 1.8691, + "step": 4357 + }, + { + "epoch": 0.88, + "learning_rate": 3.4571703543384295e-05, + "loss": 1.8867, + "step": 4358 + }, + { + "epoch": 0.88, + "learning_rate": 3.445170681730964e-05, + "loss": 1.9443, + "step": 4359 + }, + { + "epoch": 0.88, + "learning_rate": 3.433191127383078e-05, + "loss": 1.8799, + "step": 4360 + }, + { + "epoch": 0.89, + "learning_rate": 3.4212316964716386e-05, + "loss": 1.8369, + "step": 4361 + }, + { + "epoch": 0.89, + "learning_rate": 3.4092923941647926e-05, + "loss": 1.8896, + "step": 4362 + }, + { + "epoch": 0.89, + "learning_rate": 3.397373225622025e-05, + "loss": 1.7812, + "step": 4363 + }, + { + "epoch": 0.89, + "learning_rate": 3.3854741959940926e-05, + "loss": 1.8457, + "step": 4364 + }, + { + "epoch": 0.89, + "learning_rate": 3.373595310423039e-05, + "loss": 1.8965, + "step": 4365 + }, + { + "epoch": 0.89, + "learning_rate": 3.361736574042246e-05, + "loss": 1.7334, + "step": 4366 + }, + { + "epoch": 0.89, + "learning_rate": 3.349897991976336e-05, + "loss": 1.7959, + "step": 4367 + }, + { + "epoch": 0.89, + "learning_rate": 3.338079569341268e-05, + "loss": 1.8535, + "step": 4368 + }, + { + "epoch": 0.89, + "learning_rate": 3.3262813112442667e-05, + "loss": 1.8311, + "step": 4369 + }, + { + "epoch": 0.89, + "learning_rate": 3.3145032227838435e-05, + "loss": 1.957, + "step": 4370 + }, + { + "epoch": 0.89, + "learning_rate": 3.3027453090497874e-05, + "loss": 1.8213, + "step": 4371 + }, + { + "epoch": 0.89, + "learning_rate": 3.2910075751231895e-05, + "loss": 1.8389, + "step": 4372 + }, + { + "epoch": 0.89, + "learning_rate": 3.2792900260764035e-05, + "loss": 1.7861, + "step": 4373 + }, + { + "epoch": 0.89, + "learning_rate": 3.267592666973057e-05, + "loss": 1.8105, + "step": 4374 + }, + { + "epoch": 0.89, + "learning_rate": 3.255915502868062e-05, + "loss": 1.9258, + "step": 4375 + }, + { + "epoch": 0.89, + "learning_rate": 3.244258538807621e-05, + "loss": 2.0205, + "step": 4376 + }, + { + "epoch": 0.89, + "learning_rate": 3.232621779829176e-05, + "loss": 1.8838, + "step": 4377 + }, + { + "epoch": 0.89, + "learning_rate": 3.2210052309614436e-05, + "loss": 1.8936, + "step": 4378 + }, + { + "epoch": 0.89, + "learning_rate": 3.209408897224419e-05, + "loss": 1.8262, + "step": 4379 + }, + { + "epoch": 0.89, + "learning_rate": 3.197832783629362e-05, + "loss": 1.9365, + "step": 4380 + }, + { + "epoch": 0.89, + "learning_rate": 3.186276895178774e-05, + "loss": 1.7373, + "step": 4381 + }, + { + "epoch": 0.89, + "learning_rate": 3.1747412368664354e-05, + "loss": 1.8447, + "step": 4382 + }, + { + "epoch": 0.89, + "learning_rate": 3.16322581367739e-05, + "loss": 1.8857, + "step": 4383 + }, + { + "epoch": 0.89, + "learning_rate": 3.151730630587912e-05, + "loss": 1.915, + "step": 4384 + }, + { + "epoch": 0.89, + "learning_rate": 3.140255692565558e-05, + "loss": 1.8018, + "step": 4385 + }, + { + "epoch": 0.89, + "learning_rate": 3.1288010045691095e-05, + "loss": 1.7686, + "step": 4386 + }, + { + "epoch": 0.89, + "learning_rate": 3.1173665715486075e-05, + "loss": 1.7578, + "step": 4387 + }, + { + "epoch": 0.89, + "learning_rate": 3.10595239844535e-05, + "loss": 1.8242, + "step": 4388 + }, + { + "epoch": 0.89, + "learning_rate": 3.094558490191873e-05, + "loss": 1.748, + "step": 4389 + }, + { + "epoch": 0.89, + "learning_rate": 3.083184851711945e-05, + "loss": 1.9355, + "step": 4390 + }, + { + "epoch": 0.89, + "learning_rate": 3.0718314879205954e-05, + "loss": 1.8174, + "step": 4391 + }, + { + "epoch": 0.89, + "learning_rate": 3.060498403724071e-05, + "loss": 1.8389, + "step": 4392 + }, + { + "epoch": 0.89, + "learning_rate": 3.0491856040198606e-05, + "loss": 1.9336, + "step": 4393 + }, + { + "epoch": 0.89, + "learning_rate": 3.037893093696703e-05, + "loss": 1.8145, + "step": 4394 + }, + { + "epoch": 0.89, + "learning_rate": 3.0266208776345395e-05, + "loss": 1.8555, + "step": 4395 + }, + { + "epoch": 0.89, + "learning_rate": 3.0153689607045842e-05, + "loss": 1.8008, + "step": 4396 + }, + { + "epoch": 0.89, + "learning_rate": 3.0041373477692314e-05, + "loss": 1.9883, + "step": 4397 + }, + { + "epoch": 0.89, + "learning_rate": 2.992926043682137e-05, + "loss": 1.8281, + "step": 4398 + }, + { + "epoch": 0.89, + "learning_rate": 2.9817350532881648e-05, + "loss": 1.833, + "step": 4399 + }, + { + "epoch": 0.89, + "learning_rate": 2.9705643814233907e-05, + "loss": 1.8506, + "step": 4400 + }, + { + "epoch": 0.89, + "learning_rate": 2.959414032915142e-05, + "loss": 1.8936, + "step": 4401 + }, + { + "epoch": 0.89, + "learning_rate": 2.9482840125819254e-05, + "loss": 1.9287, + "step": 4402 + }, + { + "epoch": 0.89, + "learning_rate": 2.9371743252334936e-05, + "loss": 1.8916, + "step": 4403 + }, + { + "epoch": 0.89, + "learning_rate": 2.9260849756708007e-05, + "loss": 1.8564, + "step": 4404 + }, + { + "epoch": 0.89, + "learning_rate": 2.9150159686860022e-05, + "loss": 1.8428, + "step": 4405 + }, + { + "epoch": 0.89, + "learning_rate": 2.9039673090624775e-05, + "loss": 1.8721, + "step": 4406 + }, + { + "epoch": 0.89, + "learning_rate": 2.8929390015748125e-05, + "loss": 1.9053, + "step": 4407 + }, + { + "epoch": 0.89, + "learning_rate": 2.881931050988784e-05, + "loss": 1.9111, + "step": 4408 + }, + { + "epoch": 0.89, + "learning_rate": 2.8709434620613762e-05, + "loss": 1.9131, + "step": 4409 + }, + { + "epoch": 0.9, + "learning_rate": 2.85997623954079e-05, + "loss": 1.958, + "step": 4410 + }, + { + "epoch": 0.9, + "learning_rate": 2.8490293881664186e-05, + "loss": 1.832, + "step": 4411 + }, + { + "epoch": 0.9, + "learning_rate": 2.8381029126688384e-05, + "loss": 1.7461, + "step": 4412 + }, + { + "epoch": 0.9, + "learning_rate": 2.8271968177698282e-05, + "loss": 1.875, + "step": 4413 + }, + { + "epoch": 0.9, + "learning_rate": 2.816311108182368e-05, + "loss": 1.8564, + "step": 4414 + }, + { + "epoch": 0.9, + "learning_rate": 2.8054457886106167e-05, + "loss": 1.9717, + "step": 4415 + }, + { + "epoch": 0.9, + "learning_rate": 2.7946008637499244e-05, + "loss": 1.8613, + "step": 4416 + }, + { + "epoch": 0.9, + "learning_rate": 2.7837763382868252e-05, + "loss": 1.8057, + "step": 4417 + }, + { + "epoch": 0.9, + "learning_rate": 2.772972216899061e-05, + "loss": 1.8721, + "step": 4418 + }, + { + "epoch": 0.9, + "learning_rate": 2.7621885042555196e-05, + "loss": 1.917, + "step": 4419 + }, + { + "epoch": 0.9, + "learning_rate": 2.7514252050163003e-05, + "loss": 1.877, + "step": 4420 + }, + { + "epoch": 0.9, + "learning_rate": 2.740682323832666e-05, + "loss": 1.707, + "step": 4421 + }, + { + "epoch": 0.9, + "learning_rate": 2.7299598653470425e-05, + "loss": 1.8652, + "step": 4422 + }, + { + "epoch": 0.9, + "learning_rate": 2.7192578341930617e-05, + "loss": 1.7959, + "step": 4423 + }, + { + "epoch": 0.9, + "learning_rate": 2.7085762349955135e-05, + "loss": 1.7783, + "step": 4424 + }, + { + "epoch": 0.9, + "learning_rate": 2.697915072370344e-05, + "loss": 1.875, + "step": 4425 + }, + { + "epoch": 0.9, + "learning_rate": 2.6872743509246954e-05, + "loss": 1.7324, + "step": 4426 + }, + { + "epoch": 0.9, + "learning_rate": 2.6766540752568514e-05, + "loss": 1.7871, + "step": 4427 + }, + { + "epoch": 0.9, + "learning_rate": 2.6660542499562788e-05, + "loss": 1.8916, + "step": 4428 + }, + { + "epoch": 0.9, + "learning_rate": 2.6554748796035865e-05, + "loss": 1.917, + "step": 4429 + }, + { + "epoch": 0.9, + "learning_rate": 2.6449159687705615e-05, + "loss": 1.8223, + "step": 4430 + }, + { + "epoch": 0.9, + "learning_rate": 2.6343775220201537e-05, + "loss": 1.7842, + "step": 4431 + }, + { + "epoch": 0.9, + "learning_rate": 2.6238595439064485e-05, + "loss": 1.8721, + "step": 4432 + }, + { + "epoch": 0.9, + "learning_rate": 2.6133620389747094e-05, + "loss": 1.8301, + "step": 4433 + }, + { + "epoch": 0.9, + "learning_rate": 2.60288501176133e-05, + "loss": 1.8223, + "step": 4434 + }, + { + "epoch": 0.9, + "learning_rate": 2.5924284667938613e-05, + "loss": 1.792, + "step": 4435 + }, + { + "epoch": 0.9, + "learning_rate": 2.5819924085910217e-05, + "loss": 1.915, + "step": 4436 + }, + { + "epoch": 0.9, + "learning_rate": 2.5715768416626485e-05, + "loss": 1.8711, + "step": 4437 + }, + { + "epoch": 0.9, + "learning_rate": 2.5611817705097406e-05, + "loss": 1.8066, + "step": 4438 + }, + { + "epoch": 0.9, + "learning_rate": 2.5508071996244497e-05, + "loss": 1.9229, + "step": 4439 + }, + { + "epoch": 0.9, + "learning_rate": 2.5404531334900448e-05, + "loss": 1.9453, + "step": 4440 + }, + { + "epoch": 0.9, + "learning_rate": 2.530119576580936e-05, + "loss": 1.8438, + "step": 4441 + }, + { + "epoch": 0.9, + "learning_rate": 2.519806533362695e-05, + "loss": 1.8486, + "step": 4442 + }, + { + "epoch": 0.9, + "learning_rate": 2.5095140082920022e-05, + "loss": 1.7246, + "step": 4443 + }, + { + "epoch": 0.9, + "learning_rate": 2.4992420058166765e-05, + "loss": 1.9385, + "step": 4444 + }, + { + "epoch": 0.9, + "learning_rate": 2.4889905303756844e-05, + "loss": 1.8184, + "step": 4445 + }, + { + "epoch": 0.9, + "learning_rate": 2.4787595863991153e-05, + "loss": 1.8809, + "step": 4446 + }, + { + "epoch": 0.9, + "learning_rate": 2.4685491783081714e-05, + "loss": 1.8555, + "step": 4447 + }, + { + "epoch": 0.9, + "learning_rate": 2.4583593105151846e-05, + "loss": 1.8867, + "step": 4448 + }, + { + "epoch": 0.9, + "learning_rate": 2.4481899874236323e-05, + "loss": 1.9111, + "step": 4449 + }, + { + "epoch": 0.9, + "learning_rate": 2.4380412134280883e-05, + "loss": 1.8018, + "step": 4450 + }, + { + "epoch": 0.9, + "learning_rate": 2.427912992914255e-05, + "loss": 1.8525, + "step": 4451 + }, + { + "epoch": 0.9, + "learning_rate": 2.4178053302589598e-05, + "loss": 1.8457, + "step": 4452 + }, + { + "epoch": 0.9, + "learning_rate": 2.4077182298301414e-05, + "loss": 1.96, + "step": 4453 + }, + { + "epoch": 0.9, + "learning_rate": 2.3976516959868412e-05, + "loss": 1.8545, + "step": 4454 + }, + { + "epoch": 0.9, + "learning_rate": 2.3876057330792345e-05, + "loss": 1.8018, + "step": 4455 + }, + { + "epoch": 0.9, + "learning_rate": 2.3775803454485935e-05, + "loss": 1.7871, + "step": 4456 + }, + { + "epoch": 0.9, + "learning_rate": 2.3675755374272856e-05, + "loss": 1.8818, + "step": 4457 + }, + { + "epoch": 0.9, + "learning_rate": 2.3575913133388195e-05, + "loss": 1.8779, + "step": 4458 + }, + { + "epoch": 0.91, + "learning_rate": 2.347627677497788e-05, + "loss": 1.8604, + "step": 4459 + }, + { + "epoch": 0.91, + "learning_rate": 2.3376846342098744e-05, + "loss": 1.8135, + "step": 4460 + }, + { + "epoch": 0.91, + "learning_rate": 2.3277621877718924e-05, + "loss": 1.9072, + "step": 4461 + }, + { + "epoch": 0.91, + "learning_rate": 2.3178603424717394e-05, + "loss": 1.8057, + "step": 4462 + }, + { + "epoch": 0.91, + "learning_rate": 2.3079791025884033e-05, + "loss": 1.876, + "step": 4463 + }, + { + "epoch": 0.91, + "learning_rate": 2.298118472391969e-05, + "loss": 1.9736, + "step": 4464 + }, + { + "epoch": 0.91, + "learning_rate": 2.2882784561436277e-05, + "loss": 1.7578, + "step": 4465 + }, + { + "epoch": 0.91, + "learning_rate": 2.2784590580956664e-05, + "loss": 1.8447, + "step": 4466 + }, + { + "epoch": 0.91, + "learning_rate": 2.268660282491436e-05, + "loss": 1.8311, + "step": 4467 + }, + { + "epoch": 0.91, + "learning_rate": 2.258882133565404e-05, + "loss": 1.8965, + "step": 4468 + }, + { + "epoch": 0.91, + "learning_rate": 2.2491246155431076e-05, + "loss": 1.8916, + "step": 4469 + }, + { + "epoch": 0.91, + "learning_rate": 2.2393877326411573e-05, + "loss": 1.9033, + "step": 4470 + }, + { + "epoch": 0.91, + "learning_rate": 2.2296714890672808e-05, + "loss": 1.8906, + "step": 4471 + }, + { + "epoch": 0.91, + "learning_rate": 2.2199758890202594e-05, + "loss": 1.8799, + "step": 4472 + }, + { + "epoch": 0.91, + "learning_rate": 2.2103009366899575e-05, + "loss": 1.9609, + "step": 4473 + }, + { + "epoch": 0.91, + "learning_rate": 2.2006466362573307e-05, + "loss": 1.8184, + "step": 4474 + }, + { + "epoch": 0.91, + "learning_rate": 2.191012991894392e-05, + "loss": 1.9629, + "step": 4475 + }, + { + "epoch": 0.91, + "learning_rate": 2.1814000077642338e-05, + "loss": 1.8096, + "step": 4476 + }, + { + "epoch": 0.91, + "learning_rate": 2.1718076880210326e-05, + "loss": 1.8379, + "step": 4477 + }, + { + "epoch": 0.91, + "learning_rate": 2.1622360368100226e-05, + "loss": 1.8779, + "step": 4478 + }, + { + "epoch": 0.91, + "learning_rate": 2.152685058267495e-05, + "loss": 1.7227, + "step": 4479 + }, + { + "epoch": 0.91, + "learning_rate": 2.143154756520832e-05, + "loss": 1.8027, + "step": 4480 + }, + { + "epoch": 0.91, + "learning_rate": 2.133645135688478e-05, + "loss": 1.8008, + "step": 4481 + }, + { + "epoch": 0.91, + "learning_rate": 2.1241561998799174e-05, + "loss": 1.7686, + "step": 4482 + }, + { + "epoch": 0.91, + "learning_rate": 2.1146879531957153e-05, + "loss": 1.8877, + "step": 4483 + }, + { + "epoch": 0.91, + "learning_rate": 2.105240399727493e-05, + "loss": 1.7969, + "step": 4484 + }, + { + "epoch": 0.91, + "learning_rate": 2.095813543557923e-05, + "loss": 1.7812, + "step": 4485 + }, + { + "epoch": 0.91, + "learning_rate": 2.0864073887607416e-05, + "loss": 1.8193, + "step": 4486 + }, + { + "epoch": 0.91, + "learning_rate": 2.0770219394007362e-05, + "loss": 1.8115, + "step": 4487 + }, + { + "epoch": 0.91, + "learning_rate": 2.0676571995337512e-05, + "loss": 1.8789, + "step": 4488 + }, + { + "epoch": 0.91, + "learning_rate": 2.0583131732066606e-05, + "loss": 1.9131, + "step": 4489 + }, + { + "epoch": 0.91, + "learning_rate": 2.048989864457418e-05, + "loss": 1.7998, + "step": 4490 + }, + { + "epoch": 0.91, + "learning_rate": 2.039687277315011e-05, + "loss": 1.8867, + "step": 4491 + }, + { + "epoch": 0.91, + "learning_rate": 2.0304054157994523e-05, + "loss": 1.8105, + "step": 4492 + }, + { + "epoch": 0.91, + "learning_rate": 2.0211442839218275e-05, + "loss": 1.7578, + "step": 4493 + }, + { + "epoch": 0.91, + "learning_rate": 2.011903885684263e-05, + "loss": 1.9268, + "step": 4494 + }, + { + "epoch": 0.91, + "learning_rate": 2.0026842250799037e-05, + "loss": 1.7891, + "step": 4495 + }, + { + "epoch": 0.91, + "learning_rate": 1.9934853060929458e-05, + "loss": 1.9775, + "step": 4496 + }, + { + "epoch": 0.91, + "learning_rate": 1.984307132698626e-05, + "loss": 1.8115, + "step": 4497 + }, + { + "epoch": 0.91, + "learning_rate": 1.9751497088632054e-05, + "loss": 1.915, + "step": 4498 + }, + { + "epoch": 0.91, + "learning_rate": 1.9660130385439846e-05, + "loss": 1.9023, + "step": 4499 + }, + { + "epoch": 0.91, + "learning_rate": 1.9568971256892942e-05, + "loss": 1.7969, + "step": 4500 + }, + { + "epoch": 0.91, + "learning_rate": 1.947801974238511e-05, + "loss": 1.8193, + "step": 4501 + }, + { + "epoch": 0.91, + "learning_rate": 1.9387275881220022e-05, + "loss": 1.873, + "step": 4502 + }, + { + "epoch": 0.91, + "learning_rate": 1.9296739712611977e-05, + "loss": 1.835, + "step": 4503 + }, + { + "epoch": 0.91, + "learning_rate": 1.92064112756854e-05, + "loss": 1.8867, + "step": 4504 + }, + { + "epoch": 0.91, + "learning_rate": 1.9116290609474786e-05, + "loss": 1.8467, + "step": 4505 + }, + { + "epoch": 0.91, + "learning_rate": 1.9026377752925095e-05, + "loss": 1.8535, + "step": 4506 + }, + { + "epoch": 0.91, + "learning_rate": 1.893667274489136e-05, + "loss": 1.9307, + "step": 4507 + }, + { + "epoch": 0.91, + "learning_rate": 1.884717562413879e-05, + "loss": 1.8926, + "step": 4508 + }, + { + "epoch": 0.92, + "learning_rate": 1.8757886429342895e-05, + "loss": 1.8574, + "step": 4509 + }, + { + "epoch": 0.92, + "learning_rate": 1.866880519908909e-05, + "loss": 1.9375, + "step": 4510 + }, + { + "epoch": 0.92, + "learning_rate": 1.8579931971872966e-05, + "loss": 1.8545, + "step": 4511 + }, + { + "epoch": 0.92, + "learning_rate": 1.8491266786100525e-05, + "loss": 1.7656, + "step": 4512 + }, + { + "epoch": 0.92, + "learning_rate": 1.840280968008745e-05, + "loss": 1.7705, + "step": 4513 + }, + { + "epoch": 0.92, + "learning_rate": 1.8314560692059833e-05, + "loss": 1.9014, + "step": 4514 + }, + { + "epoch": 0.92, + "learning_rate": 1.822651986015361e-05, + "loss": 1.8516, + "step": 4515 + }, + { + "epoch": 0.92, + "learning_rate": 1.8138687222414962e-05, + "loss": 1.8047, + "step": 4516 + }, + { + "epoch": 0.92, + "learning_rate": 1.8051062816799912e-05, + "loss": 1.8193, + "step": 4517 + }, + { + "epoch": 0.92, + "learning_rate": 1.796364668117445e-05, + "loss": 1.875, + "step": 4518 + }, + { + "epoch": 0.92, + "learning_rate": 1.7876438853314968e-05, + "loss": 1.8086, + "step": 4519 + }, + { + "epoch": 0.92, + "learning_rate": 1.7789439370907325e-05, + "loss": 1.8408, + "step": 4520 + }, + { + "epoch": 0.92, + "learning_rate": 1.770264827154766e-05, + "loss": 1.7646, + "step": 4521 + }, + { + "epoch": 0.92, + "learning_rate": 1.7616065592742035e-05, + "loss": 1.793, + "step": 4522 + }, + { + "epoch": 0.92, + "learning_rate": 1.7529691371906354e-05, + "loss": 1.7539, + "step": 4523 + }, + { + "epoch": 0.92, + "learning_rate": 1.744352564636642e-05, + "loss": 1.915, + "step": 4524 + }, + { + "epoch": 0.92, + "learning_rate": 1.735756845335812e-05, + "loss": 1.7832, + "step": 4525 + }, + { + "epoch": 0.92, + "learning_rate": 1.7271819830026957e-05, + "loss": 1.8086, + "step": 4526 + }, + { + "epoch": 0.92, + "learning_rate": 1.718627981342852e-05, + "loss": 1.8604, + "step": 4527 + }, + { + "epoch": 0.92, + "learning_rate": 1.710094844052812e-05, + "loss": 1.9727, + "step": 4528 + }, + { + "epoch": 0.92, + "learning_rate": 1.701582574820104e-05, + "loss": 1.8564, + "step": 4529 + }, + { + "epoch": 0.92, + "learning_rate": 1.6930911773232306e-05, + "loss": 1.9082, + "step": 4530 + }, + { + "epoch": 0.92, + "learning_rate": 1.6846206552316613e-05, + "loss": 1.915, + "step": 4531 + }, + { + "epoch": 0.92, + "learning_rate": 1.676171012205874e-05, + "loss": 1.8174, + "step": 4532 + }, + { + "epoch": 0.92, + "learning_rate": 1.667742251897303e-05, + "loss": 1.8564, + "step": 4533 + }, + { + "epoch": 0.92, + "learning_rate": 1.6593343779483517e-05, + "loss": 1.8408, + "step": 4534 + }, + { + "epoch": 0.92, + "learning_rate": 1.6509473939924135e-05, + "loss": 1.8135, + "step": 4535 + }, + { + "epoch": 0.92, + "learning_rate": 1.6425813036538616e-05, + "loss": 1.8857, + "step": 4536 + }, + { + "epoch": 0.92, + "learning_rate": 1.6342361105480096e-05, + "loss": 1.7676, + "step": 4537 + }, + { + "epoch": 0.92, + "learning_rate": 1.6259118182811785e-05, + "loss": 1.8164, + "step": 4538 + }, + { + "epoch": 0.92, + "learning_rate": 1.617608430450629e-05, + "loss": 1.8525, + "step": 4539 + }, + { + "epoch": 0.92, + "learning_rate": 1.609325950644591e-05, + "loss": 1.8848, + "step": 4540 + }, + { + "epoch": 0.92, + "learning_rate": 1.60106438244228e-05, + "loss": 1.7812, + "step": 4541 + }, + { + "epoch": 0.92, + "learning_rate": 1.5928237294138394e-05, + "loss": 1.8945, + "step": 4542 + }, + { + "epoch": 0.92, + "learning_rate": 1.5846039951204095e-05, + "loss": 1.7861, + "step": 4543 + }, + { + "epoch": 0.92, + "learning_rate": 1.576405183114077e-05, + "loss": 1.8105, + "step": 4544 + }, + { + "epoch": 0.92, + "learning_rate": 1.568227296937885e-05, + "loss": 1.8818, + "step": 4545 + }, + { + "epoch": 0.92, + "learning_rate": 1.5600703401258297e-05, + "loss": 1.8486, + "step": 4546 + }, + { + "epoch": 0.92, + "learning_rate": 1.551934316202869e-05, + "loss": 1.9404, + "step": 4547 + }, + { + "epoch": 0.92, + "learning_rate": 1.543819228684912e-05, + "loss": 1.8877, + "step": 4548 + }, + { + "epoch": 0.92, + "learning_rate": 1.5357250810788315e-05, + "loss": 1.8535, + "step": 4549 + }, + { + "epoch": 0.92, + "learning_rate": 1.52765187688243e-05, + "loss": 1.9316, + "step": 4550 + }, + { + "epoch": 0.92, + "learning_rate": 1.5195996195844885e-05, + "loss": 1.8506, + "step": 4551 + }, + { + "epoch": 0.92, + "learning_rate": 1.5115683126647072e-05, + "loss": 2.0049, + "step": 4552 + }, + { + "epoch": 0.92, + "learning_rate": 1.5035579595937377e-05, + "loss": 1.9131, + "step": 4553 + }, + { + "epoch": 0.92, + "learning_rate": 1.4955685638331995e-05, + "loss": 1.8301, + "step": 4554 + }, + { + "epoch": 0.92, + "learning_rate": 1.4876001288356311e-05, + "loss": 1.8838, + "step": 4555 + }, + { + "epoch": 0.92, + "learning_rate": 1.479652658044517e-05, + "loss": 1.8906, + "step": 4556 + }, + { + "epoch": 0.92, + "learning_rate": 1.471726154894304e-05, + "loss": 1.8154, + "step": 4557 + }, + { + "epoch": 0.93, + "learning_rate": 1.4638206228103413e-05, + "loss": 1.8457, + "step": 4558 + }, + { + "epoch": 0.93, + "learning_rate": 1.4559360652089404e-05, + "loss": 1.8809, + "step": 4559 + }, + { + "epoch": 0.93, + "learning_rate": 1.4480724854973536e-05, + "loss": 1.8047, + "step": 4560 + }, + { + "epoch": 0.93, + "learning_rate": 1.4402298870737517e-05, + "loss": 1.8633, + "step": 4561 + }, + { + "epoch": 0.93, + "learning_rate": 1.4324082733272348e-05, + "loss": 1.7119, + "step": 4562 + }, + { + "epoch": 0.93, + "learning_rate": 1.4246076476378489e-05, + "loss": 1.9268, + "step": 4563 + }, + { + "epoch": 0.93, + "learning_rate": 1.416828013376581e-05, + "loss": 1.8926, + "step": 4564 + }, + { + "epoch": 0.93, + "learning_rate": 1.409069373905314e-05, + "loss": 1.8193, + "step": 4565 + }, + { + "epoch": 0.93, + "learning_rate": 1.4013317325768826e-05, + "loss": 1.8896, + "step": 4566 + }, + { + "epoch": 0.93, + "learning_rate": 1.3936150927350399e-05, + "loss": 1.8193, + "step": 4567 + }, + { + "epoch": 0.93, + "learning_rate": 1.385919457714463e-05, + "loss": 1.8926, + "step": 4568 + }, + { + "epoch": 0.93, + "learning_rate": 1.378244830840747e-05, + "loss": 1.9102, + "step": 4569 + }, + { + "epoch": 0.93, + "learning_rate": 1.3705912154304224e-05, + "loss": 1.8867, + "step": 4570 + }, + { + "epoch": 0.93, + "learning_rate": 1.3629586147909323e-05, + "loss": 1.8516, + "step": 4571 + }, + { + "epoch": 0.93, + "learning_rate": 1.355347032220633e-05, + "loss": 1.8623, + "step": 4572 + }, + { + "epoch": 0.93, + "learning_rate": 1.3477564710088097e-05, + "loss": 1.8652, + "step": 4573 + }, + { + "epoch": 0.93, + "learning_rate": 1.34018693443565e-05, + "loss": 1.8496, + "step": 4574 + }, + { + "epoch": 0.93, + "learning_rate": 1.3326384257722645e-05, + "loss": 1.8652, + "step": 4575 + }, + { + "epoch": 0.93, + "learning_rate": 1.3251109482806666e-05, + "loss": 1.9219, + "step": 4576 + }, + { + "epoch": 0.93, + "learning_rate": 1.3176045052138097e-05, + "loss": 1.7881, + "step": 4577 + }, + { + "epoch": 0.93, + "learning_rate": 1.3101190998155155e-05, + "loss": 1.7939, + "step": 4578 + }, + { + "epoch": 0.93, + "learning_rate": 1.302654735320552e-05, + "loss": 1.7373, + "step": 4579 + }, + { + "epoch": 0.93, + "learning_rate": 1.2952114149545724e-05, + "loss": 1.8525, + "step": 4580 + }, + { + "epoch": 0.93, + "learning_rate": 1.2877891419341426e-05, + "loss": 1.8652, + "step": 4581 + }, + { + "epoch": 0.93, + "learning_rate": 1.2803879194667245e-05, + "loss": 1.8242, + "step": 4582 + }, + { + "epoch": 0.93, + "learning_rate": 1.2730077507506987e-05, + "loss": 1.7959, + "step": 4583 + }, + { + "epoch": 0.93, + "learning_rate": 1.2656486389753418e-05, + "loss": 1.8418, + "step": 4584 + }, + { + "epoch": 0.93, + "learning_rate": 1.2583105873208266e-05, + "loss": 1.7949, + "step": 4585 + }, + { + "epoch": 0.93, + "learning_rate": 1.2509935989582332e-05, + "loss": 1.8467, + "step": 4586 + }, + { + "epoch": 0.93, + "learning_rate": 1.2436976770495268e-05, + "loss": 1.833, + "step": 4587 + }, + { + "epoch": 0.93, + "learning_rate": 1.2364228247475684e-05, + "loss": 1.8711, + "step": 4588 + }, + { + "epoch": 0.93, + "learning_rate": 1.2291690451961435e-05, + "loss": 1.8398, + "step": 4589 + }, + { + "epoch": 0.93, + "learning_rate": 1.2219363415298833e-05, + "loss": 1.7578, + "step": 4590 + }, + { + "epoch": 0.93, + "learning_rate": 1.2147247168743547e-05, + "loss": 1.7891, + "step": 4591 + }, + { + "epoch": 0.93, + "learning_rate": 1.2075341743459978e-05, + "loss": 1.8809, + "step": 4592 + }, + { + "epoch": 0.93, + "learning_rate": 1.2003647170521381e-05, + "loss": 1.8066, + "step": 4593 + }, + { + "epoch": 0.93, + "learning_rate": 1.1932163480909864e-05, + "loss": 1.8545, + "step": 4594 + }, + { + "epoch": 0.93, + "learning_rate": 1.1860890705516547e-05, + "loss": 1.8818, + "step": 4595 + }, + { + "epoch": 0.93, + "learning_rate": 1.1789828875141351e-05, + "loss": 1.8506, + "step": 4596 + }, + { + "epoch": 0.93, + "learning_rate": 1.1718978020492987e-05, + "loss": 1.957, + "step": 4597 + }, + { + "epoch": 0.93, + "learning_rate": 1.1648338172188966e-05, + "loss": 1.79, + "step": 4598 + }, + { + "epoch": 0.93, + "learning_rate": 1.1577909360755812e-05, + "loss": 1.7197, + "step": 4599 + }, + { + "epoch": 0.93, + "learning_rate": 1.150769161662868e-05, + "loss": 1.8135, + "step": 4600 + }, + { + "epoch": 0.93, + "learning_rate": 1.1437684970151407e-05, + "loss": 1.7754, + "step": 4601 + }, + { + "epoch": 0.93, + "learning_rate": 1.1367889451576962e-05, + "loss": 1.8789, + "step": 4602 + }, + { + "epoch": 0.93, + "learning_rate": 1.1298305091066662e-05, + "loss": 1.8525, + "step": 4603 + }, + { + "epoch": 0.93, + "learning_rate": 1.12289319186909e-05, + "loss": 1.8555, + "step": 4604 + }, + { + "epoch": 0.93, + "learning_rate": 1.1159769964428745e-05, + "loss": 1.8369, + "step": 4605 + }, + { + "epoch": 0.93, + "learning_rate": 1.1090819258167795e-05, + "loss": 1.8428, + "step": 4606 + }, + { + "epoch": 0.94, + "learning_rate": 1.1022079829704435e-05, + "loss": 1.8467, + "step": 4607 + }, + { + "epoch": 0.94, + "learning_rate": 1.0953551708744014e-05, + "loss": 1.7627, + "step": 4608 + }, + { + "epoch": 0.94, + "learning_rate": 1.0885234924900234e-05, + "loss": 1.835, + "step": 4609 + }, + { + "epoch": 0.94, + "learning_rate": 1.0817129507695534e-05, + "loss": 1.8711, + "step": 4610 + }, + { + "epoch": 0.94, + "learning_rate": 1.0749235486561149e-05, + "loss": 1.7959, + "step": 4611 + }, + { + "epoch": 0.94, + "learning_rate": 1.0681552890836943e-05, + "loss": 1.8857, + "step": 4612 + }, + { + "epoch": 0.94, + "learning_rate": 1.0614081749771187e-05, + "loss": 1.8281, + "step": 4613 + }, + { + "epoch": 0.94, + "learning_rate": 1.0546822092521113e-05, + "loss": 1.9248, + "step": 4614 + }, + { + "epoch": 0.94, + "learning_rate": 1.0479773948152305e-05, + "loss": 1.8369, + "step": 4615 + }, + { + "epoch": 0.94, + "learning_rate": 1.0412937345639029e-05, + "loss": 1.791, + "step": 4616 + }, + { + "epoch": 0.94, + "learning_rate": 1.0346312313864126e-05, + "loss": 1.8838, + "step": 4617 + }, + { + "epoch": 0.94, + "learning_rate": 1.027989888161901e-05, + "loss": 1.8887, + "step": 4618 + }, + { + "epoch": 0.94, + "learning_rate": 1.0213697077603722e-05, + "loss": 1.7959, + "step": 4619 + }, + { + "epoch": 0.94, + "learning_rate": 1.0147706930426715e-05, + "loss": 1.8887, + "step": 4620 + }, + { + "epoch": 0.94, + "learning_rate": 1.0081928468605117e-05, + "loss": 1.8408, + "step": 4621 + }, + { + "epoch": 0.94, + "learning_rate": 1.0016361720564472e-05, + "loss": 1.9463, + "step": 4622 + }, + { + "epoch": 0.94, + "learning_rate": 9.951006714638833e-06, + "loss": 1.8721, + "step": 4623 + }, + { + "epoch": 0.94, + "learning_rate": 9.88586347907089e-06, + "loss": 1.8877, + "step": 4624 + }, + { + "epoch": 0.94, + "learning_rate": 9.820932042011622e-06, + "loss": 1.8418, + "step": 4625 + }, + { + "epoch": 0.94, + "learning_rate": 9.75621243152064e-06, + "loss": 1.8154, + "step": 4626 + }, + { + "epoch": 0.94, + "learning_rate": 9.691704675565904e-06, + "loss": 1.8623, + "step": 4627 + }, + { + "epoch": 0.94, + "learning_rate": 9.627408802024007e-06, + "loss": 1.8125, + "step": 4628 + }, + { + "epoch": 0.94, + "learning_rate": 9.56332483867961e-06, + "loss": 1.9355, + "step": 4629 + }, + { + "epoch": 0.94, + "learning_rate": 9.499452813226283e-06, + "loss": 1.8809, + "step": 4630 + }, + { + "epoch": 0.94, + "learning_rate": 9.43579275326556e-06, + "loss": 1.7764, + "step": 4631 + }, + { + "epoch": 0.94, + "learning_rate": 9.372344686307655e-06, + "loss": 1.8291, + "step": 4632 + }, + { + "epoch": 0.94, + "learning_rate": 9.309108639771136e-06, + "loss": 1.9531, + "step": 4633 + }, + { + "epoch": 0.94, + "learning_rate": 9.246084640982866e-06, + "loss": 1.9414, + "step": 4634 + }, + { + "epoch": 0.94, + "learning_rate": 9.183272717178059e-06, + "loss": 1.8535, + "step": 4635 + }, + { + "epoch": 0.94, + "learning_rate": 9.120672895500393e-06, + "loss": 1.8516, + "step": 4636 + }, + { + "epoch": 0.94, + "learning_rate": 9.058285203001837e-06, + "loss": 1.8691, + "step": 4637 + }, + { + "epoch": 0.94, + "learning_rate": 8.99610966664266e-06, + "loss": 1.832, + "step": 4638 + }, + { + "epoch": 0.94, + "learning_rate": 8.934146313291425e-06, + "loss": 1.8379, + "step": 4639 + }, + { + "epoch": 0.94, + "learning_rate": 8.872395169725156e-06, + "loss": 1.834, + "step": 4640 + }, + { + "epoch": 0.94, + "learning_rate": 8.810856262629064e-06, + "loss": 1.7988, + "step": 4641 + }, + { + "epoch": 0.94, + "learning_rate": 8.749529618596485e-06, + "loss": 1.8643, + "step": 4642 + }, + { + "epoch": 0.94, + "learning_rate": 8.68841526412939e-06, + "loss": 1.7891, + "step": 4643 + }, + { + "epoch": 0.94, + "learning_rate": 8.627513225637651e-06, + "loss": 1.9717, + "step": 4644 + }, + { + "epoch": 0.94, + "learning_rate": 8.56682352943966e-06, + "loss": 1.9082, + "step": 4645 + }, + { + "epoch": 0.94, + "learning_rate": 8.506346201761828e-06, + "loss": 1.7881, + "step": 4646 + }, + { + "epoch": 0.94, + "learning_rate": 8.44608126873897e-06, + "loss": 1.8564, + "step": 4647 + }, + { + "epoch": 0.94, + "learning_rate": 8.386028756414089e-06, + "loss": 1.7744, + "step": 4648 + }, + { + "epoch": 0.94, + "learning_rate": 8.326188690738257e-06, + "loss": 1.877, + "step": 4649 + }, + { + "epoch": 0.94, + "learning_rate": 8.266561097570902e-06, + "loss": 1.8057, + "step": 4650 + }, + { + "epoch": 0.94, + "learning_rate": 8.207146002679522e-06, + "loss": 1.79, + "step": 4651 + }, + { + "epoch": 0.94, + "learning_rate": 8.147943431739801e-06, + "loss": 1.7832, + "step": 4652 + }, + { + "epoch": 0.94, + "learning_rate": 8.088953410335609e-06, + "loss": 1.8828, + "step": 4653 + }, + { + "epoch": 0.94, + "learning_rate": 8.030175963959108e-06, + "loss": 1.8604, + "step": 4654 + }, + { + "epoch": 0.94, + "learning_rate": 7.971611118010258e-06, + "loss": 1.8301, + "step": 4655 + }, + { + "epoch": 0.94, + "learning_rate": 7.913258897797481e-06, + "loss": 1.7783, + "step": 4656 + }, + { + "epoch": 0.95, + "learning_rate": 7.855119328537108e-06, + "loss": 1.918, + "step": 4657 + }, + { + "epoch": 0.95, + "learning_rate": 7.797192435353651e-06, + "loss": 1.8848, + "step": 4658 + }, + { + "epoch": 0.95, + "learning_rate": 7.739478243279696e-06, + "loss": 1.8721, + "step": 4659 + }, + { + "epoch": 0.95, + "learning_rate": 7.681976777255962e-06, + "loss": 1.7979, + "step": 4660 + }, + { + "epoch": 0.95, + "learning_rate": 7.624688062131124e-06, + "loss": 1.8096, + "step": 4661 + }, + { + "epoch": 0.95, + "learning_rate": 7.5676121226621594e-06, + "loss": 1.8076, + "step": 4662 + }, + { + "epoch": 0.95, + "learning_rate": 7.5107489835137825e-06, + "loss": 1.8145, + "step": 4663 + }, + { + "epoch": 0.95, + "learning_rate": 7.454098669258891e-06, + "loss": 1.8516, + "step": 4664 + }, + { + "epoch": 0.95, + "learning_rate": 7.397661204378514e-06, + "loss": 1.8223, + "step": 4665 + }, + { + "epoch": 0.95, + "learning_rate": 7.341436613261532e-06, + "loss": 1.8662, + "step": 4666 + }, + { + "epoch": 0.95, + "learning_rate": 7.285424920204953e-06, + "loss": 1.8564, + "step": 4667 + }, + { + "epoch": 0.95, + "learning_rate": 7.229626149413693e-06, + "loss": 1.9404, + "step": 4668 + }, + { + "epoch": 0.95, + "learning_rate": 7.174040325000797e-06, + "loss": 1.8213, + "step": 4669 + }, + { + "epoch": 0.95, + "learning_rate": 7.118667470987106e-06, + "loss": 1.8037, + "step": 4670 + }, + { + "epoch": 0.95, + "learning_rate": 7.063507611301423e-06, + "loss": 1.875, + "step": 4671 + }, + { + "epoch": 0.95, + "learning_rate": 7.008560769780792e-06, + "loss": 1.8184, + "step": 4672 + }, + { + "epoch": 0.95, + "learning_rate": 6.953826970169886e-06, + "loss": 1.8447, + "step": 4673 + }, + { + "epoch": 0.95, + "learning_rate": 6.8993062361213946e-06, + "loss": 1.7588, + "step": 4674 + }, + { + "epoch": 0.95, + "learning_rate": 6.844998591196139e-06, + "loss": 1.8037, + "step": 4675 + }, + { + "epoch": 0.95, + "learning_rate": 6.790904058862568e-06, + "loss": 1.8125, + "step": 4676 + }, + { + "epoch": 0.95, + "learning_rate": 6.737022662497094e-06, + "loss": 1.7773, + "step": 4677 + }, + { + "epoch": 0.95, + "learning_rate": 6.683354425384259e-06, + "loss": 1.8926, + "step": 4678 + }, + { + "epoch": 0.95, + "learning_rate": 6.629899370716231e-06, + "loss": 1.7744, + "step": 4679 + }, + { + "epoch": 0.95, + "learning_rate": 6.576657521593088e-06, + "loss": 1.8984, + "step": 4680 + }, + { + "epoch": 0.95, + "learning_rate": 6.523628901022927e-06, + "loss": 1.7324, + "step": 4681 + }, + { + "epoch": 0.95, + "learning_rate": 6.470813531921527e-06, + "loss": 1.8857, + "step": 4682 + }, + { + "epoch": 0.95, + "learning_rate": 6.418211437112631e-06, + "loss": 1.8887, + "step": 4683 + }, + { + "epoch": 0.95, + "learning_rate": 6.365822639327723e-06, + "loss": 1.8584, + "step": 4684 + }, + { + "epoch": 0.95, + "learning_rate": 6.313647161206193e-06, + "loss": 1.9355, + "step": 4685 + }, + { + "epoch": 0.95, + "learning_rate": 6.261685025295283e-06, + "loss": 1.8711, + "step": 4686 + }, + { + "epoch": 0.95, + "learning_rate": 6.209936254049808e-06, + "loss": 1.7725, + "step": 4687 + }, + { + "epoch": 0.95, + "learning_rate": 6.158400869832604e-06, + "loss": 1.8506, + "step": 4688 + }, + { + "epoch": 0.95, + "learning_rate": 6.107078894914353e-06, + "loss": 1.8721, + "step": 4689 + }, + { + "epoch": 0.95, + "learning_rate": 6.055970351473261e-06, + "loss": 1.7637, + "step": 4690 + }, + { + "epoch": 0.95, + "learning_rate": 6.005075261595494e-06, + "loss": 1.8223, + "step": 4691 + }, + { + "epoch": 0.95, + "learning_rate": 5.954393647274958e-06, + "loss": 1.8936, + "step": 4692 + }, + { + "epoch": 0.95, + "learning_rate": 5.903925530413135e-06, + "loss": 1.7568, + "step": 4693 + }, + { + "epoch": 0.95, + "learning_rate": 5.853670932819466e-06, + "loss": 1.9336, + "step": 4694 + }, + { + "epoch": 0.95, + "learning_rate": 5.803629876211025e-06, + "loss": 1.8672, + "step": 4695 + }, + { + "epoch": 0.95, + "learning_rate": 5.753802382212625e-06, + "loss": 1.8389, + "step": 4696 + }, + { + "epoch": 0.95, + "learning_rate": 5.7041884723567636e-06, + "loss": 1.8633, + "step": 4697 + }, + { + "epoch": 0.95, + "learning_rate": 5.65478816808368e-06, + "loss": 1.8848, + "step": 4698 + }, + { + "epoch": 0.95, + "learning_rate": 5.605601490741241e-06, + "loss": 1.8057, + "step": 4699 + }, + { + "epoch": 0.95, + "learning_rate": 5.556628461585001e-06, + "loss": 1.8154, + "step": 4700 + }, + { + "epoch": 0.95, + "learning_rate": 5.507869101778307e-06, + "loss": 1.7988, + "step": 4701 + }, + { + "epoch": 0.95, + "learning_rate": 5.459323432392083e-06, + "loss": 1.9014, + "step": 4702 + }, + { + "epoch": 0.95, + "learning_rate": 5.410991474404825e-06, + "loss": 1.9053, + "step": 4703 + }, + { + "epoch": 0.95, + "learning_rate": 5.362873248702827e-06, + "loss": 1.792, + "step": 4704 + }, + { + "epoch": 0.95, + "learning_rate": 5.314968776080009e-06, + "loss": 1.8252, + "step": 4705 + }, + { + "epoch": 0.96, + "learning_rate": 5.267278077237758e-06, + "loss": 1.8076, + "step": 4706 + }, + { + "epoch": 0.96, + "learning_rate": 5.219801172785255e-06, + "loss": 1.8975, + "step": 4707 + }, + { + "epoch": 0.96, + "learning_rate": 5.172538083239198e-06, + "loss": 1.8838, + "step": 4708 + }, + { + "epoch": 0.96, + "learning_rate": 5.125488829023916e-06, + "loss": 1.7305, + "step": 4709 + }, + { + "epoch": 0.96, + "learning_rate": 5.078653430471425e-06, + "loss": 1.8623, + "step": 4710 + }, + { + "epoch": 0.96, + "learning_rate": 5.032031907821088e-06, + "loss": 1.9043, + "step": 4711 + }, + { + "epoch": 0.96, + "learning_rate": 4.985624281220014e-06, + "loss": 1.8066, + "step": 4712 + }, + { + "epoch": 0.96, + "learning_rate": 4.939430570722936e-06, + "loss": 1.6982, + "step": 4713 + }, + { + "epoch": 0.96, + "learning_rate": 4.893450796291998e-06, + "loss": 1.8154, + "step": 4714 + }, + { + "epoch": 0.96, + "learning_rate": 4.8476849777969735e-06, + "loss": 1.8955, + "step": 4715 + }, + { + "epoch": 0.96, + "learning_rate": 4.802133135015096e-06, + "loss": 1.7881, + "step": 4716 + }, + { + "epoch": 0.96, + "learning_rate": 4.756795287631288e-06, + "loss": 1.8086, + "step": 4717 + }, + { + "epoch": 0.96, + "learning_rate": 4.71167145523782e-06, + "loss": 1.8828, + "step": 4718 + }, + { + "epoch": 0.96, + "learning_rate": 4.666761657334595e-06, + "loss": 1.835, + "step": 4719 + }, + { + "epoch": 0.96, + "learning_rate": 4.622065913329032e-06, + "loss": 1.9336, + "step": 4720 + }, + { + "epoch": 0.96, + "learning_rate": 4.5775842425359595e-06, + "loss": 1.9033, + "step": 4721 + }, + { + "epoch": 0.96, + "learning_rate": 4.533316664177667e-06, + "loss": 1.8701, + "step": 4722 + }, + { + "epoch": 0.96, + "learning_rate": 4.48926319738413e-06, + "loss": 1.8154, + "step": 4723 + }, + { + "epoch": 0.96, + "learning_rate": 4.445423861192566e-06, + "loss": 1.7764, + "step": 4724 + }, + { + "epoch": 0.96, + "learning_rate": 4.4017986745478186e-06, + "loss": 1.8652, + "step": 4725 + }, + { + "epoch": 0.96, + "learning_rate": 4.358387656302143e-06, + "loss": 1.7852, + "step": 4726 + }, + { + "epoch": 0.96, + "learning_rate": 4.315190825215143e-06, + "loss": 1.7578, + "step": 4727 + }, + { + "epoch": 0.96, + "learning_rate": 4.272208199953997e-06, + "loss": 1.7891, + "step": 4728 + }, + { + "epoch": 0.96, + "learning_rate": 4.229439799093293e-06, + "loss": 1.8408, + "step": 4729 + }, + { + "epoch": 0.96, + "learning_rate": 4.186885641115023e-06, + "loss": 1.7646, + "step": 4730 + }, + { + "epoch": 0.96, + "learning_rate": 4.144545744408535e-06, + "loss": 1.9521, + "step": 4731 + }, + { + "epoch": 0.96, + "learning_rate": 4.1024201272706894e-06, + "loss": 1.7705, + "step": 4732 + }, + { + "epoch": 0.96, + "learning_rate": 4.06050880790565e-06, + "loss": 1.835, + "step": 4733 + }, + { + "epoch": 0.96, + "learning_rate": 4.018811804425093e-06, + "loss": 1.8379, + "step": 4734 + }, + { + "epoch": 0.96, + "learning_rate": 3.97732913484794e-06, + "loss": 1.8184, + "step": 4735 + }, + { + "epoch": 0.96, + "learning_rate": 3.936060817100628e-06, + "loss": 1.8408, + "step": 4736 + }, + { + "epoch": 0.96, + "learning_rate": 3.8950068690168375e-06, + "loss": 1.8594, + "step": 4737 + }, + { + "epoch": 0.96, + "learning_rate": 3.8541673083377085e-06, + "loss": 1.8457, + "step": 4738 + }, + { + "epoch": 0.96, + "learning_rate": 3.813542152711735e-06, + "loss": 1.8516, + "step": 4739 + }, + { + "epoch": 0.96, + "learning_rate": 3.773131419694653e-06, + "loss": 1.8096, + "step": 4740 + }, + { + "epoch": 0.96, + "learning_rate": 3.732935126749604e-06, + "loss": 1.7656, + "step": 4741 + }, + { + "epoch": 0.96, + "learning_rate": 3.692953291247081e-06, + "loss": 1.8389, + "step": 4742 + }, + { + "epoch": 0.96, + "learning_rate": 3.6531859304649305e-06, + "loss": 1.9375, + "step": 4743 + }, + { + "epoch": 0.96, + "learning_rate": 3.613633061588184e-06, + "loss": 1.874, + "step": 4744 + }, + { + "epoch": 0.96, + "learning_rate": 3.5742947017092795e-06, + "loss": 1.79, + "step": 4745 + }, + { + "epoch": 0.96, + "learning_rate": 3.535170867828008e-06, + "loss": 1.7988, + "step": 4746 + }, + { + "epoch": 0.96, + "learning_rate": 3.496261576851345e-06, + "loss": 1.751, + "step": 4747 + }, + { + "epoch": 0.96, + "learning_rate": 3.457566845593618e-06, + "loss": 1.8193, + "step": 4748 + }, + { + "epoch": 0.96, + "learning_rate": 3.419086690776396e-06, + "loss": 1.8418, + "step": 4749 + }, + { + "epoch": 0.96, + "learning_rate": 3.3808211290284885e-06, + "loss": 1.957, + "step": 4750 + }, + { + "epoch": 0.96, + "learning_rate": 3.342770176886112e-06, + "loss": 1.8799, + "step": 4751 + }, + { + "epoch": 0.96, + "learning_rate": 3.304933850792613e-06, + "loss": 1.9141, + "step": 4752 + }, + { + "epoch": 0.96, + "learning_rate": 3.2673121670986906e-06, + "loss": 1.7852, + "step": 4753 + }, + { + "epoch": 0.96, + "learning_rate": 3.2299051420620617e-06, + "loss": 1.7363, + "step": 4754 + }, + { + "epoch": 0.97, + "learning_rate": 3.1927127918479624e-06, + "loss": 1.7734, + "step": 4755 + }, + { + "epoch": 0.97, + "learning_rate": 3.1557351325287587e-06, + "loss": 1.957, + "step": 4756 + }, + { + "epoch": 0.97, + "learning_rate": 3.1189721800839456e-06, + "loss": 1.8057, + "step": 4757 + }, + { + "epoch": 0.97, + "learning_rate": 3.08242395040037e-06, + "loss": 1.8975, + "step": 4758 + }, + { + "epoch": 0.97, + "learning_rate": 3.0460904592719553e-06, + "loss": 1.8604, + "step": 4759 + }, + { + "epoch": 0.97, + "learning_rate": 3.0099717223999733e-06, + "loss": 1.8301, + "step": 4760 + }, + { + "epoch": 0.97, + "learning_rate": 2.9740677553928285e-06, + "loss": 1.9141, + "step": 4761 + }, + { + "epoch": 0.97, + "learning_rate": 2.9383785737659984e-06, + "loss": 1.8643, + "step": 4762 + }, + { + "epoch": 0.97, + "learning_rate": 2.9029041929423684e-06, + "loss": 1.876, + "step": 4763 + }, + { + "epoch": 0.97, + "learning_rate": 2.8676446282517864e-06, + "loss": 1.9043, + "step": 4764 + }, + { + "epoch": 0.97, + "learning_rate": 2.8325998949314534e-06, + "loss": 1.9541, + "step": 4765 + }, + { + "epoch": 0.97, + "learning_rate": 2.797770008125533e-06, + "loss": 1.8877, + "step": 4766 + }, + { + "epoch": 0.97, + "learning_rate": 2.763154982885541e-06, + "loss": 1.7559, + "step": 4767 + }, + { + "epoch": 0.97, + "learning_rate": 2.7287548341700663e-06, + "loss": 1.793, + "step": 4768 + }, + { + "epoch": 0.97, + "learning_rate": 2.694569576844774e-06, + "loss": 1.8936, + "step": 4769 + }, + { + "epoch": 0.97, + "learning_rate": 2.660599225682514e-06, + "loss": 1.8086, + "step": 4770 + }, + { + "epoch": 0.97, + "learning_rate": 2.6268437953633205e-06, + "loss": 1.8281, + "step": 4771 + }, + { + "epoch": 0.97, + "learning_rate": 2.5933033004743032e-06, + "loss": 1.7832, + "step": 4772 + }, + { + "epoch": 0.97, + "learning_rate": 2.559977755509646e-06, + "loss": 1.8291, + "step": 4773 + }, + { + "epoch": 0.97, + "learning_rate": 2.5268671748707173e-06, + "loss": 1.8955, + "step": 4774 + }, + { + "epoch": 0.97, + "learning_rate": 2.4939715728659607e-06, + "loss": 1.8096, + "step": 4775 + }, + { + "epoch": 0.97, + "learning_rate": 2.4612909637109495e-06, + "loss": 1.7363, + "step": 4776 + }, + { + "epoch": 0.97, + "learning_rate": 2.4288253615282753e-06, + "loss": 1.7256, + "step": 4777 + }, + { + "epoch": 0.97, + "learning_rate": 2.3965747803477155e-06, + "loss": 1.8564, + "step": 4778 + }, + { + "epoch": 0.97, + "learning_rate": 2.364539234106011e-06, + "loss": 1.6895, + "step": 4779 + }, + { + "epoch": 0.97, + "learning_rate": 2.332718736647088e-06, + "loss": 1.8682, + "step": 4780 + }, + { + "epoch": 0.97, + "learning_rate": 2.301113301721891e-06, + "loss": 1.876, + "step": 4781 + }, + { + "epoch": 0.97, + "learning_rate": 2.2697229429883837e-06, + "loss": 1.8271, + "step": 4782 + }, + { + "epoch": 0.97, + "learning_rate": 2.238547674011715e-06, + "loss": 1.8115, + "step": 4783 + }, + { + "epoch": 0.97, + "learning_rate": 2.2075875082639417e-06, + "loss": 1.8242, + "step": 4784 + }, + { + "epoch": 0.97, + "learning_rate": 2.1768424591241954e-06, + "loss": 1.7559, + "step": 4785 + }, + { + "epoch": 0.97, + "learning_rate": 2.1463125398787365e-06, + "loss": 1.7783, + "step": 4786 + }, + { + "epoch": 0.97, + "learning_rate": 2.115997763720845e-06, + "loss": 1.8848, + "step": 4787 + }, + { + "epoch": 0.97, + "learning_rate": 2.085898143750653e-06, + "loss": 1.7803, + "step": 4788 + }, + { + "epoch": 0.97, + "learning_rate": 2.0560136929755893e-06, + "loss": 1.793, + "step": 4789 + }, + { + "epoch": 0.97, + "learning_rate": 2.026344424309823e-06, + "loss": 1.876, + "step": 4790 + }, + { + "epoch": 0.97, + "learning_rate": 1.996890350574765e-06, + "loss": 1.8701, + "step": 4791 + }, + { + "epoch": 0.97, + "learning_rate": 1.967651484498734e-06, + "loss": 1.7773, + "step": 4792 + }, + { + "epoch": 0.97, + "learning_rate": 1.9386278387169553e-06, + "loss": 1.7881, + "step": 4793 + }, + { + "epoch": 0.97, + "learning_rate": 1.9098194257718414e-06, + "loss": 1.7402, + "step": 4794 + }, + { + "epoch": 0.97, + "learning_rate": 1.881226258112656e-06, + "loss": 1.9521, + "step": 4795 + }, + { + "epoch": 0.97, + "learning_rate": 1.8528483480956814e-06, + "loss": 1.8262, + "step": 4796 + }, + { + "epoch": 0.97, + "learning_rate": 1.824685707984164e-06, + "loss": 1.8301, + "step": 4797 + }, + { + "epoch": 0.97, + "learning_rate": 1.7967383499483681e-06, + "loss": 1.8242, + "step": 4798 + }, + { + "epoch": 0.97, + "learning_rate": 1.7690062860654666e-06, + "loss": 1.8174, + "step": 4799 + }, + { + "epoch": 0.97, + "learning_rate": 1.7414895283197063e-06, + "loss": 1.8438, + "step": 4800 + }, + { + "epoch": 0.97, + "learning_rate": 1.714188088602131e-06, + "loss": 1.8818, + "step": 4801 + }, + { + "epoch": 0.97, + "learning_rate": 1.6871019787108587e-06, + "loss": 1.8135, + "step": 4802 + }, + { + "epoch": 0.97, + "learning_rate": 1.6602312103508598e-06, + "loss": 1.8691, + "step": 4803 + }, + { + "epoch": 0.98, + "learning_rate": 1.633575795134179e-06, + "loss": 1.7725, + "step": 4804 + }, + { + "epoch": 0.98, + "learning_rate": 1.6071357445796576e-06, + "loss": 1.916, + "step": 4805 + }, + { + "epoch": 0.98, + "learning_rate": 1.5809110701131557e-06, + "loss": 1.9189, + "step": 4806 + }, + { + "epoch": 0.98, + "learning_rate": 1.5549017830674418e-06, + "loss": 1.9463, + "step": 4807 + }, + { + "epoch": 0.98, + "learning_rate": 1.5291078946821912e-06, + "loss": 1.8105, + "step": 4808 + }, + { + "epoch": 0.98, + "learning_rate": 1.503529416103988e-06, + "loss": 1.9365, + "step": 4809 + }, + { + "epoch": 0.98, + "learning_rate": 1.4781663583863792e-06, + "loss": 1.7656, + "step": 4810 + }, + { + "epoch": 0.98, + "learning_rate": 1.4530187324897082e-06, + "loss": 1.875, + "step": 4811 + }, + { + "epoch": 0.98, + "learning_rate": 1.4280865492814487e-06, + "loss": 1.9717, + "step": 4812 + }, + { + "epoch": 0.98, + "learning_rate": 1.4033698195357048e-06, + "loss": 1.9414, + "step": 4813 + }, + { + "epoch": 0.98, + "learning_rate": 1.3788685539335989e-06, + "loss": 1.9326, + "step": 4814 + }, + { + "epoch": 0.98, + "learning_rate": 1.3545827630632168e-06, + "loss": 1.7832, + "step": 4815 + }, + { + "epoch": 0.98, + "learning_rate": 1.3305124574193306e-06, + "loss": 1.8076, + "step": 4816 + }, + { + "epoch": 0.98, + "learning_rate": 1.3066576474038417e-06, + "loss": 1.876, + "step": 4817 + }, + { + "epoch": 0.98, + "learning_rate": 1.2830183433252818e-06, + "loss": 1.8877, + "step": 4818 + }, + { + "epoch": 0.98, + "learning_rate": 1.2595945553992573e-06, + "loss": 1.8369, + "step": 4819 + }, + { + "epoch": 0.98, + "learning_rate": 1.2363862937481152e-06, + "loss": 1.9229, + "step": 4820 + }, + { + "epoch": 0.98, + "learning_rate": 1.2133935684010556e-06, + "loss": 1.9248, + "step": 4821 + }, + { + "epoch": 0.98, + "learning_rate": 1.1906163892942967e-06, + "loss": 1.8359, + "step": 4822 + }, + { + "epoch": 0.98, + "learning_rate": 1.1680547662706875e-06, + "loss": 1.8047, + "step": 4823 + }, + { + "epoch": 0.98, + "learning_rate": 1.14570870908004e-06, + "loss": 1.8691, + "step": 4824 + }, + { + "epoch": 0.98, + "learning_rate": 1.123578227379074e-06, + "loss": 1.8223, + "step": 4825 + }, + { + "epoch": 0.98, + "learning_rate": 1.1016633307312507e-06, + "loss": 1.7725, + "step": 4826 + }, + { + "epoch": 0.98, + "learning_rate": 1.0799640286068834e-06, + "loss": 1.8799, + "step": 4827 + }, + { + "epoch": 0.98, + "learning_rate": 1.0584803303831381e-06, + "loss": 1.9551, + "step": 4828 + }, + { + "epoch": 0.98, + "learning_rate": 1.0372122453440324e-06, + "loss": 1.9004, + "step": 4829 + }, + { + "epoch": 0.98, + "learning_rate": 1.016159782680326e-06, + "loss": 1.8145, + "step": 4830 + }, + { + "epoch": 0.98, + "learning_rate": 9.95322951489741e-07, + "loss": 1.8564, + "step": 4831 + }, + { + "epoch": 0.98, + "learning_rate": 9.747017607766862e-07, + "loss": 1.9453, + "step": 4832 + }, + { + "epoch": 0.98, + "learning_rate": 9.542962194524217e-07, + "loss": 1.8877, + "step": 4833 + }, + { + "epoch": 0.98, + "learning_rate": 9.341063363349501e-07, + "loss": 1.7793, + "step": 4834 + }, + { + "epoch": 0.98, + "learning_rate": 9.141321201492914e-07, + "loss": 1.7559, + "step": 4835 + }, + { + "epoch": 0.98, + "learning_rate": 8.943735795270969e-07, + "loss": 1.9229, + "step": 4836 + }, + { + "epoch": 0.98, + "learning_rate": 8.748307230067587e-07, + "loss": 1.6689, + "step": 4837 + }, + { + "epoch": 0.98, + "learning_rate": 8.555035590336324e-07, + "loss": 1.8584, + "step": 4838 + }, + { + "epoch": 0.98, + "learning_rate": 8.363920959597038e-07, + "loss": 1.8428, + "step": 4839 + }, + { + "epoch": 0.98, + "learning_rate": 8.174963420439219e-07, + "loss": 1.8496, + "step": 4840 + }, + { + "epoch": 0.98, + "learning_rate": 7.98816305451866e-07, + "loss": 1.8516, + "step": 4841 + }, + { + "epoch": 0.98, + "learning_rate": 7.803519942559123e-07, + "loss": 1.7734, + "step": 4842 + }, + { + "epoch": 0.98, + "learning_rate": 7.621034164353446e-07, + "loss": 1.8643, + "step": 4843 + }, + { + "epoch": 0.98, + "learning_rate": 7.440705798760217e-07, + "loss": 1.9531, + "step": 4844 + }, + { + "epoch": 0.98, + "learning_rate": 7.262534923707654e-07, + "loss": 1.7949, + "step": 4845 + }, + { + "epoch": 0.98, + "learning_rate": 7.086521616190278e-07, + "loss": 1.8965, + "step": 4846 + }, + { + "epoch": 0.98, + "learning_rate": 6.91266595227058e-07, + "loss": 1.8789, + "step": 4847 + }, + { + "epoch": 0.98, + "learning_rate": 6.740968007079018e-07, + "loss": 1.7422, + "step": 4848 + }, + { + "epoch": 0.98, + "learning_rate": 6.571427854813461e-07, + "loss": 1.8301, + "step": 4849 + }, + { + "epoch": 0.98, + "learning_rate": 6.404045568738637e-07, + "loss": 1.8105, + "step": 4850 + }, + { + "epoch": 0.98, + "learning_rate": 6.238821221187796e-07, + "loss": 1.874, + "step": 4851 + }, + { + "epoch": 0.98, + "learning_rate": 6.075754883561601e-07, + "loss": 1.8457, + "step": 4852 + }, + { + "epoch": 0.98, + "learning_rate": 5.914846626326464e-07, + "loss": 1.8232, + "step": 4853 + }, + { + "epoch": 0.99, + "learning_rate": 5.756096519018428e-07, + "loss": 1.8828, + "step": 4854 + }, + { + "epoch": 0.99, + "learning_rate": 5.599504630239838e-07, + "loss": 1.7539, + "step": 4855 + }, + { + "epoch": 0.99, + "learning_rate": 5.445071027659898e-07, + "loss": 1.874, + "step": 4856 + }, + { + "epoch": 0.99, + "learning_rate": 5.29279577801689e-07, + "loss": 1.6904, + "step": 4857 + }, + { + "epoch": 0.99, + "learning_rate": 5.142678947114288e-07, + "loss": 1.7881, + "step": 4858 + }, + { + "epoch": 0.99, + "learning_rate": 4.994720599824087e-07, + "loss": 1.9111, + "step": 4859 + }, + { + "epoch": 0.99, + "learning_rate": 4.848920800085143e-07, + "loss": 1.8643, + "step": 4860 + }, + { + "epoch": 0.99, + "learning_rate": 4.7052796109031683e-07, + "loss": 1.7578, + "step": 4861 + }, + { + "epoch": 0.99, + "learning_rate": 4.5637970943523953e-07, + "loss": 1.9434, + "step": 4862 + }, + { + "epoch": 0.99, + "learning_rate": 4.4244733115722525e-07, + "loss": 1.8271, + "step": 4863 + }, + { + "epoch": 0.99, + "learning_rate": 4.2873083227706887e-07, + "loss": 1.8682, + "step": 4864 + }, + { + "epoch": 0.99, + "learning_rate": 4.152302187222512e-07, + "loss": 1.7979, + "step": 4865 + }, + { + "epoch": 0.99, + "learning_rate": 4.0194549632693867e-07, + "loss": 1.9092, + "step": 4866 + }, + { + "epoch": 0.99, + "learning_rate": 3.888766708319835e-07, + "loss": 1.8604, + "step": 4867 + }, + { + "epoch": 0.99, + "learning_rate": 3.7602374788497927e-07, + "loss": 1.7676, + "step": 4868 + }, + { + "epoch": 0.99, + "learning_rate": 3.633867330402052e-07, + "loss": 1.7734, + "step": 4869 + }, + { + "epoch": 0.99, + "learning_rate": 3.509656317585708e-07, + "loss": 1.7607, + "step": 4870 + }, + { + "epoch": 0.99, + "learning_rate": 3.387604494078933e-07, + "loss": 1.8037, + "step": 4871 + }, + { + "epoch": 0.99, + "learning_rate": 3.267711912623983e-07, + "loss": 1.8232, + "step": 4872 + }, + { + "epoch": 0.99, + "learning_rate": 3.14997862503219e-07, + "loss": 1.8477, + "step": 4873 + }, + { + "epoch": 0.99, + "learning_rate": 3.034404682180081e-07, + "loss": 1.8438, + "step": 4874 + }, + { + "epoch": 0.99, + "learning_rate": 2.9209901340132574e-07, + "loss": 1.8564, + "step": 4875 + }, + { + "epoch": 0.99, + "learning_rate": 2.809735029541405e-07, + "loss": 1.7773, + "step": 4876 + }, + { + "epoch": 0.99, + "learning_rate": 2.700639416843287e-07, + "loss": 1.8408, + "step": 4877 + }, + { + "epoch": 0.99, + "learning_rate": 2.593703343063414e-07, + "loss": 1.8418, + "step": 4878 + }, + { + "epoch": 0.99, + "learning_rate": 2.488926854413154e-07, + "loss": 1.8652, + "step": 4879 + }, + { + "epoch": 0.99, + "learning_rate": 2.3863099961707323e-07, + "loss": 1.9023, + "step": 4880 + }, + { + "epoch": 0.99, + "learning_rate": 2.2858528126812327e-07, + "loss": 1.751, + "step": 4881 + }, + { + "epoch": 0.99, + "learning_rate": 2.1875553473565956e-07, + "loss": 1.8057, + "step": 4882 + }, + { + "epoch": 0.99, + "learning_rate": 2.0914176426750642e-07, + "loss": 1.917, + "step": 4883 + }, + { + "epoch": 0.99, + "learning_rate": 1.9974397401811838e-07, + "loss": 1.7793, + "step": 4884 + }, + { + "epoch": 0.99, + "learning_rate": 1.905621680487468e-07, + "loss": 1.8271, + "step": 4885 + }, + { + "epoch": 0.99, + "learning_rate": 1.8159635032716227e-07, + "loss": 1.7451, + "step": 4886 + }, + { + "epoch": 0.99, + "learning_rate": 1.7284652472787654e-07, + "loss": 1.8174, + "step": 4887 + }, + { + "epoch": 0.99, + "learning_rate": 1.6431269503208723e-07, + "loss": 1.8193, + "step": 4888 + }, + { + "epoch": 0.99, + "learning_rate": 1.5599486492756664e-07, + "loss": 1.7148, + "step": 4889 + }, + { + "epoch": 0.99, + "learning_rate": 1.4789303800882835e-07, + "loss": 1.8848, + "step": 4890 + }, + { + "epoch": 0.99, + "learning_rate": 1.4000721777701618e-07, + "loss": 1.7568, + "step": 4891 + }, + { + "epoch": 0.99, + "learning_rate": 1.3233740763990422e-07, + "loss": 1.7686, + "step": 4892 + }, + { + "epoch": 0.99, + "learning_rate": 1.248836109118967e-07, + "loss": 1.7725, + "step": 4893 + }, + { + "epoch": 0.99, + "learning_rate": 1.1764583081408375e-07, + "loss": 1.8115, + "step": 4894 + }, + { + "epoch": 0.99, + "learning_rate": 1.1062407047429668e-07, + "loss": 1.8242, + "step": 4895 + }, + { + "epoch": 0.99, + "learning_rate": 1.0381833292683052e-07, + "loss": 1.8799, + "step": 4896 + }, + { + "epoch": 0.99, + "learning_rate": 9.722862111277708e-08, + "loss": 1.8896, + "step": 4897 + }, + { + "epoch": 0.99, + "learning_rate": 9.085493787980293e-08, + "loss": 1.8711, + "step": 4898 + }, + { + "epoch": 0.99, + "learning_rate": 8.469728598220483e-08, + "loss": 1.8135, + "step": 4899 + }, + { + "epoch": 0.99, + "learning_rate": 7.875566808107637e-08, + "loss": 1.8672, + "step": 4900 + }, + { + "epoch": 0.99, + "learning_rate": 7.30300867439193e-08, + "loss": 1.8516, + "step": 4901 + }, + { + "epoch": 0.99, + "learning_rate": 6.752054444497669e-08, + "loss": 1.833, + "step": 4902 + }, + { + "epoch": 1.0, + "learning_rate": 6.22270435652883e-08, + "loss": 1.7939, + "step": 4903 + }, + { + "epoch": 1.0, + "learning_rate": 5.714958639224666e-08, + "loss": 1.7266, + "step": 4904 + }, + { + "epoch": 1.0, + "learning_rate": 5.228817512015205e-08, + "loss": 1.8477, + "step": 4905 + }, + { + "epoch": 1.0, + "learning_rate": 4.7642811849712974e-08, + "loss": 1.8848, + "step": 4906 + }, + { + "epoch": 1.0, + "learning_rate": 4.3213498588434706e-08, + "loss": 1.8584, + "step": 4907 + }, + { + "epoch": 1.0, + "learning_rate": 3.900023725039725e-08, + "loss": 1.8008, + "step": 4908 + }, + { + "epoch": 1.0, + "learning_rate": 3.500302965636637e-08, + "loss": 1.8379, + "step": 4909 + }, + { + "epoch": 1.0, + "learning_rate": 3.122187753362704e-08, + "loss": 1.7432, + "step": 4910 + }, + { + "epoch": 1.0, + "learning_rate": 2.7656782516205513e-08, + "loss": 1.8809, + "step": 4911 + }, + { + "epoch": 1.0, + "learning_rate": 2.4307746144702768e-08, + "loss": 1.8721, + "step": 4912 + }, + { + "epoch": 1.0, + "learning_rate": 2.1174769866461053e-08, + "loss": 1.8232, + "step": 4913 + }, + { + "epoch": 1.0, + "learning_rate": 1.8257855035230807e-08, + "loss": 1.6973, + "step": 4914 + }, + { + "epoch": 1.0, + "learning_rate": 1.5557002911670282e-08, + "loss": 1.8818, + "step": 4915 + }, + { + "epoch": 1.0, + "learning_rate": 1.3072214662845916e-08, + "loss": 1.8936, + "step": 4916 + }, + { + "epoch": 1.0, + "learning_rate": 1.080349136250991e-08, + "loss": 1.8418, + "step": 4917 + }, + { + "epoch": 1.0, + "learning_rate": 8.750833991155727e-09, + "loss": 1.9121, + "step": 4918 + }, + { + "epoch": 1.0, + "learning_rate": 6.914243435796053e-09, + "loss": 1.8457, + "step": 4919 + }, + { + "epoch": 1.0, + "learning_rate": 5.2937204900738165e-09, + "loss": 1.8447, + "step": 4920 + }, + { + "epoch": 1.0, + "learning_rate": 3.889265854317703e-09, + "loss": 1.748, + "step": 4921 + }, + { + "epoch": 1.0, + "learning_rate": 2.7008801353756163e-09, + "loss": 1.876, + "step": 4922 + }, + { + "epoch": 1.0, + "learning_rate": 1.7285638468922393e-09, + "loss": 1.7412, + "step": 4923 + }, + { + "epoch": 1.0, + "learning_rate": 9.723174089759645e-10, + "loss": 1.8496, + "step": 4924 + }, + { + "epoch": 1.0, + "learning_rate": 4.321411484764504e-10, + "loss": 1.8525, + "step": 4925 + }, + { + "epoch": 1.0, + "learning_rate": 1.0803529876257656e-10, + "loss": 1.8291, + "step": 4926 + }, + { + "epoch": 1.0, + "learning_rate": 0.0, + "loss": 1.7939, + "step": 4927 + }, + { + "epoch": 1.0, + "step": 4927, + "total_flos": 3.748687970375303e+18, + "train_loss": 2.0709812480972194, + "train_runtime": 81625.4527, + "train_samples_per_second": 15.45, + "train_steps_per_second": 0.06 + } + ], + "max_steps": 4927, + "num_train_epochs": 1, + "total_flos": 3.748687970375303e+18, + "trial_name": null, + "trial_params": null +}