| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 651, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 117760.0, | |
| "learning_rate": 5.000000000000001e-07, | |
| "log_odds_chosen": 0.36438828706741333, | |
| "log_odds_ratio": -0.6397662162780762, | |
| "logits/chosen": 3.8861491680145264, | |
| "logits/rejected": 5.231001853942871, | |
| "logps/chosen": -0.9861465692520142, | |
| "logps/rejected": -1.2529093027114868, | |
| "loss": 1.953, | |
| "nll_loss": 3.2415008544921875, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.04930732399225235, | |
| "rewards/margins": 0.013338141143321991, | |
| "rewards/rejected": -0.06264545768499374, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 29184.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "log_odds_chosen": 0.17107267677783966, | |
| "log_odds_ratio": -0.6301043033599854, | |
| "logits/chosen": 4.779696464538574, | |
| "logits/rejected": 5.251872539520264, | |
| "logps/chosen": -1.1045284271240234, | |
| "logps/rejected": -1.2445374727249146, | |
| "loss": 1.7108, | |
| "nll_loss": 1.8614288568496704, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.05522642284631729, | |
| "rewards/margins": 0.007000453770160675, | |
| "rewards/rejected": -0.06222687289118767, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 3932160.0, | |
| "learning_rate": 1.5e-06, | |
| "log_odds_chosen": 0.478428453207016, | |
| "log_odds_ratio": -0.5682710409164429, | |
| "logits/chosen": 4.58956241607666, | |
| "logits/rejected": 5.215265274047852, | |
| "logps/chosen": -0.9884525537490845, | |
| "logps/rejected": -1.2604442834854126, | |
| "loss": 2.1071, | |
| "nll_loss": 1.525723934173584, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04942262917757034, | |
| "rewards/margins": 0.013599586673080921, | |
| "rewards/rejected": -0.06302221864461899, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 63232.0, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "log_odds_chosen": 0.2763148248195648, | |
| "log_odds_ratio": -0.6428317427635193, | |
| "logits/chosen": 5.248695373535156, | |
| "logits/rejected": 5.335747718811035, | |
| "logps/chosen": -0.9019734263420105, | |
| "logps/rejected": -1.058569312095642, | |
| "loss": 1.6367, | |
| "nll_loss": 1.17539644241333, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.045098677277565, | |
| "rewards/margins": 0.007829795591533184, | |
| "rewards/rejected": -0.052928466349840164, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1802240.0, | |
| "learning_rate": 2.5e-06, | |
| "log_odds_chosen": -0.07109338045120239, | |
| "log_odds_ratio": -0.9068069458007812, | |
| "logits/chosen": 4.34699821472168, | |
| "logits/rejected": 5.148941993713379, | |
| "logps/chosen": -1.0307292938232422, | |
| "logps/rejected": -1.0001896619796753, | |
| "loss": 2.0499, | |
| "nll_loss": 2.4581282138824463, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.05153647065162659, | |
| "rewards/margins": -0.0015269846189767122, | |
| "rewards/rejected": -0.05000948905944824, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 473088.0, | |
| "learning_rate": 3e-06, | |
| "log_odds_chosen": 0.7022095918655396, | |
| "log_odds_ratio": -0.47877854108810425, | |
| "logits/chosen": 5.137725830078125, | |
| "logits/rejected": 5.073107719421387, | |
| "logps/chosen": -0.7480964660644531, | |
| "logps/rejected": -1.172572374343872, | |
| "loss": 1.9116, | |
| "nll_loss": 1.2398216724395752, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.037404827773571014, | |
| "rewards/margins": 0.021223794668912888, | |
| "rewards/rejected": -0.058628618717193604, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4161536.0, | |
| "learning_rate": 3.5e-06, | |
| "log_odds_chosen": -0.30822521448135376, | |
| "log_odds_ratio": -1.0616459846496582, | |
| "logits/chosen": 4.378929615020752, | |
| "logits/rejected": 5.239219665527344, | |
| "logps/chosen": -1.115562081336975, | |
| "logps/rejected": -0.8684147596359253, | |
| "loss": 2.0166, | |
| "nll_loss": 2.142368793487549, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.05577809736132622, | |
| "rewards/margins": -0.012357364408671856, | |
| "rewards/rejected": -0.043420739471912384, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 211968.0, | |
| "learning_rate": 4.000000000000001e-06, | |
| "log_odds_chosen": 0.38707518577575684, | |
| "log_odds_ratio": -0.5776039361953735, | |
| "logits/chosen": 5.019408226013184, | |
| "logits/rejected": 5.3371453285217285, | |
| "logps/chosen": -0.9375723004341125, | |
| "logps/rejected": -1.1710981130599976, | |
| "loss": 1.8918, | |
| "nll_loss": 1.6274166107177734, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.046878617256879807, | |
| "rewards/margins": 0.011676294729113579, | |
| "rewards/rejected": -0.058554910123348236, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1704.0, | |
| "learning_rate": 4.5e-06, | |
| "log_odds_chosen": 0.5051761865615845, | |
| "log_odds_ratio": -0.5127500295639038, | |
| "logits/chosen": 4.478859901428223, | |
| "logits/rejected": 4.748915672302246, | |
| "logps/chosen": -0.8121053576469421, | |
| "logps/rejected": -1.1007237434387207, | |
| "loss": 1.8015, | |
| "nll_loss": 1.57842218875885, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.04060526192188263, | |
| "rewards/margins": 0.014430919662117958, | |
| "rewards/rejected": -0.055036187171936035, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 5e-06, | |
| "log_odds_chosen": 1.037414789199829, | |
| "log_odds_ratio": -0.3248421549797058, | |
| "logits/chosen": 4.653676509857178, | |
| "logits/rejected": 5.350204944610596, | |
| "logps/chosen": -0.6695261001586914, | |
| "logps/rejected": -1.300445795059204, | |
| "loss": 0.9356, | |
| "nll_loss": 0.7057152986526489, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.03347630053758621, | |
| "rewards/margins": 0.031545985490083694, | |
| "rewards/rejected": -0.0650222972035408, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.8125, | |
| "learning_rate": 4.767312946227961e-06, | |
| "log_odds_chosen": 0.6677854061126709, | |
| "log_odds_ratio": -0.5610898733139038, | |
| "logits/chosen": 4.671368598937988, | |
| "logits/rejected": 5.119524002075195, | |
| "logps/chosen": -0.8684868812561035, | |
| "logps/rejected": -1.2020485401153564, | |
| "loss": 0.8288, | |
| "nll_loss": 0.9947482347488403, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.043424345552921295, | |
| "rewards/margins": 0.01667807623744011, | |
| "rewards/rejected": -0.06010241433978081, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.9921875, | |
| "learning_rate": 4.564354645876385e-06, | |
| "log_odds_chosen": 0.5597886443138123, | |
| "log_odds_ratio": -0.5193617343902588, | |
| "logits/chosen": 5.5248026847839355, | |
| "logits/rejected": 6.067958354949951, | |
| "logps/chosen": -0.9015194773674011, | |
| "logps/rejected": -1.2406196594238281, | |
| "loss": 0.7451, | |
| "nll_loss": 0.8342186212539673, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.045075975358486176, | |
| "rewards/margins": 0.01695500686764717, | |
| "rewards/rejected": -0.06203098222613335, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.875, | |
| "learning_rate": 4.385290096535147e-06, | |
| "log_odds_chosen": 0.26957136392593384, | |
| "log_odds_ratio": -0.7732787728309631, | |
| "logits/chosen": 4.8973588943481445, | |
| "logits/rejected": 5.552582263946533, | |
| "logps/chosen": -0.877202033996582, | |
| "logps/rejected": -0.9518612623214722, | |
| "loss": 0.7319, | |
| "nll_loss": 0.6940464377403259, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.04386010393500328, | |
| "rewards/margins": 0.0037329583428800106, | |
| "rewards/rejected": -0.04759306460618973, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.5, | |
| "learning_rate": 4.2257712736425835e-06, | |
| "log_odds_chosen": 0.7680839896202087, | |
| "log_odds_ratio": -0.5321913957595825, | |
| "logits/chosen": 5.471996307373047, | |
| "logits/rejected": 5.644137382507324, | |
| "logps/chosen": -0.6714180111885071, | |
| "logps/rejected": -0.9587985277175903, | |
| "loss": 0.732, | |
| "nll_loss": 0.6440631151199341, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.033570900559425354, | |
| "rewards/margins": 0.014369020238518715, | |
| "rewards/rejected": -0.047939930111169815, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.375, | |
| "learning_rate": 4.082482904638631e-06, | |
| "log_odds_chosen": 0.5068908929824829, | |
| "log_odds_ratio": -0.604145884513855, | |
| "logits/chosen": 5.474297523498535, | |
| "logits/rejected": 5.376832485198975, | |
| "logps/chosen": -0.8357957005500793, | |
| "logps/rejected": -1.0136160850524902, | |
| "loss": 0.706, | |
| "nll_loss": 0.6737378835678101, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04178978502750397, | |
| "rewards/margins": 0.00889101903885603, | |
| "rewards/rejected": -0.05068080872297287, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 3.952847075210474e-06, | |
| "log_odds_chosen": 0.615983784198761, | |
| "log_odds_ratio": -0.4876289963722229, | |
| "logits/chosen": 5.473410129547119, | |
| "logits/rejected": 6.06318998336792, | |
| "logps/chosen": -0.9676389694213867, | |
| "logps/rejected": -1.349385142326355, | |
| "loss": 0.6996, | |
| "nll_loss": 0.6852242350578308, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.048381954431533813, | |
| "rewards/margins": 0.019087309017777443, | |
| "rewards/rejected": -0.06746925413608551, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 3.834824944236852e-06, | |
| "log_odds_chosen": 0.4551977515220642, | |
| "log_odds_ratio": -0.5428072214126587, | |
| "logits/chosen": 4.785284042358398, | |
| "logits/rejected": 6.005092620849609, | |
| "logps/chosen": -0.7350739240646362, | |
| "logps/rejected": -1.0496256351470947, | |
| "loss": 0.6959, | |
| "nll_loss": 0.5339438319206238, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03675369173288345, | |
| "rewards/margins": 0.015727588906884193, | |
| "rewards/rejected": -0.052481282502412796, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 3.72677996249965e-06, | |
| "log_odds_chosen": 0.5587902665138245, | |
| "log_odds_ratio": -0.6063727140426636, | |
| "logits/chosen": 4.6595892906188965, | |
| "logits/rejected": 5.4700422286987305, | |
| "logps/chosen": -0.7482207417488098, | |
| "logps/rejected": -0.9887701272964478, | |
| "loss": 0.7233, | |
| "nll_loss": 0.5874465703964233, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03741103783249855, | |
| "rewards/margins": 0.012027469463646412, | |
| "rewards/rejected": -0.04943850636482239, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 3.6273812505500587e-06, | |
| "log_odds_chosen": 0.9965683817863464, | |
| "log_odds_ratio": -0.4162277281284332, | |
| "logits/chosen": 5.304540157318115, | |
| "logits/rejected": 5.486930847167969, | |
| "logps/chosen": -0.7579169869422913, | |
| "logps/rejected": -1.1843591928482056, | |
| "loss": 0.7298, | |
| "nll_loss": 0.6787526607513428, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.03789585083723068, | |
| "rewards/margins": 0.021322116255760193, | |
| "rewards/rejected": -0.059217967092990875, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 3.5355339059327378e-06, | |
| "log_odds_chosen": 0.2911016047000885, | |
| "log_odds_ratio": -0.6208275556564331, | |
| "logits/chosen": 5.865508556365967, | |
| "logits/rejected": 5.9140448570251465, | |
| "logps/chosen": -1.0318800210952759, | |
| "logps/rejected": -1.2233208417892456, | |
| "loss": 0.6888, | |
| "nll_loss": 0.8277570009231567, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.051594000309705734, | |
| "rewards/margins": 0.009572046808898449, | |
| "rewards/rejected": -0.061166055500507355, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.546875, | |
| "learning_rate": 3.450327796711771e-06, | |
| "log_odds_chosen": 0.3929597735404968, | |
| "log_odds_ratio": -0.6252869367599487, | |
| "logits/chosen": 5.480368137359619, | |
| "logits/rejected": 5.818605899810791, | |
| "logps/chosen": -0.8382253646850586, | |
| "logps/rejected": -1.1194109916687012, | |
| "loss": 0.703, | |
| "nll_loss": 0.7914389967918396, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.04191126674413681, | |
| "rewards/margins": 0.014059278182685375, | |
| "rewards/rejected": -0.05597054958343506, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 3.3709993123162106e-06, | |
| "log_odds_chosen": 1.1686198711395264, | |
| "log_odds_ratio": -0.39844751358032227, | |
| "logits/chosen": 4.818378448486328, | |
| "logits/rejected": 5.660789966583252, | |
| "logps/chosen": -0.5040851831436157, | |
| "logps/rejected": -0.9685913324356079, | |
| "loss": 0.6554, | |
| "nll_loss": 0.49605101346969604, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.025204259902238846, | |
| "rewards/margins": 0.02322530373930931, | |
| "rewards/rejected": -0.04842956364154816, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 2.046875, | |
| "learning_rate": 3.296902366978936e-06, | |
| "log_odds_chosen": 0.7159255743026733, | |
| "log_odds_ratio": -0.5276229977607727, | |
| "logits/chosen": 4.3275017738342285, | |
| "logits/rejected": 5.1829423904418945, | |
| "logps/chosen": -0.7593253254890442, | |
| "logps/rejected": -1.0148638486862183, | |
| "loss": 0.6289, | |
| "nll_loss": 0.609928548336029, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03796626627445221, | |
| "rewards/margins": 0.012776928022503853, | |
| "rewards/rejected": -0.05074319988489151, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 2.375, | |
| "learning_rate": 3.2274861218395142e-06, | |
| "log_odds_chosen": 0.7326894998550415, | |
| "log_odds_ratio": -0.5214331150054932, | |
| "logits/chosen": 4.783654689788818, | |
| "logits/rejected": 5.283537864685059, | |
| "logps/chosen": -0.7465990781784058, | |
| "logps/rejected": -0.9910147786140442, | |
| "loss": 0.6382, | |
| "nll_loss": 0.7347540855407715, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03732995316386223, | |
| "rewards/margins": 0.012220785021781921, | |
| "rewards/rejected": -0.04955074191093445, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 3.1622776601683796e-06, | |
| "log_odds_chosen": 0.040362291038036346, | |
| "log_odds_ratio": -0.7654204964637756, | |
| "logits/chosen": 4.929324150085449, | |
| "logits/rejected": 4.940483570098877, | |
| "logps/chosen": -0.939703106880188, | |
| "logps/rejected": -0.9395262598991394, | |
| "loss": 0.6626, | |
| "nll_loss": 0.7169132232666016, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.04698516055941582, | |
| "rewards/margins": -8.843839168548584e-06, | |
| "rewards/rejected": -0.04697632044553757, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 2.59375, | |
| "learning_rate": 3.1008683647302113e-06, | |
| "log_odds_chosen": 0.8304751515388489, | |
| "log_odds_ratio": -0.4627406597137451, | |
| "logits/chosen": 4.34907341003418, | |
| "logits/rejected": 4.541801929473877, | |
| "logps/chosen": -0.7797168493270874, | |
| "logps/rejected": -1.0878037214279175, | |
| "loss": 0.6408, | |
| "nll_loss": 0.6424815058708191, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03898584097623825, | |
| "rewards/margins": 0.015404346399009228, | |
| "rewards/rejected": -0.05439019203186035, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 2.328125, | |
| "learning_rate": 3.0429030972509227e-06, | |
| "log_odds_chosen": 0.2547241747379303, | |
| "log_odds_ratio": -0.7041358351707458, | |
| "logits/chosen": 4.1212077140808105, | |
| "logits/rejected": 5.139257431030273, | |
| "logps/chosen": -0.5988011360168457, | |
| "logps/rejected": -0.7647382020950317, | |
| "loss": 0.6441, | |
| "nll_loss": 0.4384763836860657, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.029940057545900345, | |
| "rewards/margins": 0.008296851068735123, | |
| "rewards/rejected": -0.03823690861463547, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 2.988071523335984e-06, | |
| "log_odds_chosen": 0.7432643175125122, | |
| "log_odds_ratio": -0.4928904175758362, | |
| "logits/chosen": 4.240169525146484, | |
| "logits/rejected": 4.746310234069824, | |
| "logps/chosen": -0.7583116292953491, | |
| "logps/rejected": -1.0217373371124268, | |
| "loss": 0.6349, | |
| "nll_loss": 0.5912537574768066, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.03791557624936104, | |
| "rewards/margins": 0.013171288184821606, | |
| "rewards/rejected": -0.05108686536550522, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 2.5, | |
| "learning_rate": 2.9361010975735177e-06, | |
| "log_odds_chosen": 0.6404408812522888, | |
| "log_odds_ratio": -0.5461726784706116, | |
| "logits/chosen": 4.347890377044678, | |
| "logits/rejected": 5.2955708503723145, | |
| "logps/chosen": -0.8145158886909485, | |
| "logps/rejected": -1.124975323677063, | |
| "loss": 0.6204, | |
| "nll_loss": 0.5651360154151917, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.040725789964199066, | |
| "rewards/margins": 0.015522971749305725, | |
| "rewards/rejected": -0.05624876171350479, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.96875, | |
| "learning_rate": 2.8867513459481293e-06, | |
| "log_odds_chosen": 0.4704459607601166, | |
| "log_odds_ratio": -0.6623938083648682, | |
| "logits/chosen": 4.255876064300537, | |
| "logits/rejected": 5.063040733337402, | |
| "logps/chosen": -0.7718355059623718, | |
| "logps/rejected": -1.144460916519165, | |
| "loss": 0.6404, | |
| "nll_loss": 0.6724303364753723, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03859177231788635, | |
| "rewards/margins": 0.01863126829266548, | |
| "rewards/rejected": -0.05722304433584213, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 2.453125, | |
| "learning_rate": 2.839809171235324e-06, | |
| "log_odds_chosen": 1.5952459573745728, | |
| "log_odds_ratio": -0.2707791328430176, | |
| "logits/chosen": 2.7694969177246094, | |
| "logits/rejected": 5.479510307312012, | |
| "logps/chosen": -0.4962679445743561, | |
| "logps/rejected": -1.2316776514053345, | |
| "loss": 0.6428, | |
| "nll_loss": 0.3623020648956299, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.024813394993543625, | |
| "rewards/margins": 0.03677048534154892, | |
| "rewards/rejected": -0.06158388406038284, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 2.078125, | |
| "learning_rate": 2.7950849718747376e-06, | |
| "log_odds_chosen": 0.4402007460594177, | |
| "log_odds_ratio": -0.5388344526290894, | |
| "logits/chosen": 4.8701372146606445, | |
| "logits/rejected": 4.049181938171387, | |
| "logps/chosen": -0.8427563905715942, | |
| "logps/rejected": -1.1280080080032349, | |
| "loss": 0.6661, | |
| "nll_loss": 0.6774541735649109, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04213782027363777, | |
| "rewards/margins": 0.014262576587498188, | |
| "rewards/rejected": -0.056400395929813385, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.9765625, | |
| "learning_rate": 2.752409412815902e-06, | |
| "log_odds_chosen": 1.4536019563674927, | |
| "log_odds_ratio": -0.3178521990776062, | |
| "logits/chosen": 4.046222686767578, | |
| "logits/rejected": 4.855486869812012, | |
| "logps/chosen": -0.4614998400211334, | |
| "logps/rejected": -1.0025476217269897, | |
| "loss": 0.6396, | |
| "nll_loss": 0.46759381890296936, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.02307499200105667, | |
| "rewards/margins": 0.027052391320466995, | |
| "rewards/rejected": -0.05012737959623337, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 2.711630722733202e-06, | |
| "log_odds_chosen": 0.4552677273750305, | |
| "log_odds_ratio": -0.5441101789474487, | |
| "logits/chosen": 4.233187198638916, | |
| "logits/rejected": 4.776756286621094, | |
| "logps/chosen": -0.9984881281852722, | |
| "logps/rejected": -1.3039405345916748, | |
| "loss": 0.6326, | |
| "nll_loss": 0.7266319990158081, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04992440715432167, | |
| "rewards/margins": 0.01527262944728136, | |
| "rewards/rejected": -0.06519703567028046, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 2.6726124191242444e-06, | |
| "log_odds_chosen": 0.3951299488544464, | |
| "log_odds_ratio": -0.6442996263504028, | |
| "logits/chosen": 4.592418193817139, | |
| "logits/rejected": 4.885247707366943, | |
| "logps/chosen": -0.9690208435058594, | |
| "logps/rejected": -1.1191128492355347, | |
| "loss": 0.6271, | |
| "nll_loss": 0.7028160095214844, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.04845104366540909, | |
| "rewards/margins": 0.007504602428525686, | |
| "rewards/rejected": -0.055955640971660614, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 2.109375, | |
| "learning_rate": 2.6352313834736496e-06, | |
| "log_odds_chosen": 0.6397253274917603, | |
| "log_odds_ratio": -0.4948647916316986, | |
| "logits/chosen": 3.1035220623016357, | |
| "logits/rejected": 4.4074320793151855, | |
| "logps/chosen": -0.7063679695129395, | |
| "logps/rejected": -1.086042881011963, | |
| "loss": 0.6133, | |
| "nll_loss": 0.5209956765174866, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03531839698553085, | |
| "rewards/margins": 0.0189837496727705, | |
| "rewards/rejected": -0.054302144795656204, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.9296875, | |
| "learning_rate": 2.599376224550182e-06, | |
| "log_odds_chosen": 0.5072129368782043, | |
| "log_odds_ratio": -0.5375211834907532, | |
| "logits/chosen": 4.4618144035339355, | |
| "logits/rejected": 4.897726535797119, | |
| "logps/chosen": -0.8658114671707153, | |
| "logps/rejected": -1.161678433418274, | |
| "loss": 0.625, | |
| "nll_loss": 0.7147814035415649, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.043290577828884125, | |
| "rewards/margins": 0.014793348498642445, | |
| "rewards/rejected": -0.058083921670913696, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 2.28125, | |
| "learning_rate": 2.564945880212886e-06, | |
| "log_odds_chosen": 0.5736058950424194, | |
| "log_odds_ratio": -0.4948197305202484, | |
| "logits/chosen": 4.31764554977417, | |
| "logits/rejected": 4.153486251831055, | |
| "logps/chosen": -0.8540223836898804, | |
| "logps/rejected": -1.1471771001815796, | |
| "loss": 0.6393, | |
| "nll_loss": 0.6763076186180115, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.0427011176943779, | |
| "rewards/margins": 0.014657735824584961, | |
| "rewards/rejected": -0.05735884979367256, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 3.640625, | |
| "learning_rate": 2.5318484177091667e-06, | |
| "log_odds_chosen": 0.8381564021110535, | |
| "log_odds_ratio": -0.5308811068534851, | |
| "logits/chosen": 4.037534236907959, | |
| "logits/rejected": 5.888669013977051, | |
| "logps/chosen": -0.700161337852478, | |
| "logps/rejected": -1.2042081356048584, | |
| "loss": 0.6318, | |
| "nll_loss": 0.5512461066246033, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03500806540250778, | |
| "rewards/margins": 0.025202345103025436, | |
| "rewards/rejected": -0.060210417956113815, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 2.5e-06, | |
| "log_odds_chosen": 0.7038768529891968, | |
| "log_odds_ratio": -0.43052348494529724, | |
| "logits/chosen": 3.822885036468506, | |
| "logits/rejected": 4.210227012634277, | |
| "logps/chosen": -0.6150542497634888, | |
| "logps/rejected": -0.9889954328536987, | |
| "loss": 0.6218, | |
| "nll_loss": 0.5013046264648438, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.030752714723348618, | |
| "rewards/margins": 0.01869705691933632, | |
| "rewards/rejected": -0.04944976791739464, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 2.03125, | |
| "learning_rate": 2.4693239916239746e-06, | |
| "log_odds_chosen": 0.49417972564697266, | |
| "log_odds_ratio": -0.5454962253570557, | |
| "logits/chosen": 3.7158710956573486, | |
| "logits/rejected": 4.625822067260742, | |
| "logps/chosen": -0.7136448621749878, | |
| "logps/rejected": -0.9806584119796753, | |
| "loss": 0.6163, | |
| "nll_loss": 0.5766875147819519, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03568224236369133, | |
| "rewards/margins": 0.013350683264434338, | |
| "rewards/rejected": -0.04903292655944824, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.96875, | |
| "learning_rate": 2.4397501823713327e-06, | |
| "log_odds_chosen": 1.2905668020248413, | |
| "log_odds_ratio": -0.3054632544517517, | |
| "logits/chosen": 4.375031471252441, | |
| "logits/rejected": 5.165828704833984, | |
| "logps/chosen": -0.6634560823440552, | |
| "logps/rejected": -1.2297804355621338, | |
| "loss": 0.6299, | |
| "nll_loss": 0.5654190182685852, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.03317280486226082, | |
| "rewards/margins": 0.02831621840596199, | |
| "rewards/rejected": -0.06148902326822281, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 2.234375, | |
| "learning_rate": 2.411214110852061e-06, | |
| "log_odds_chosen": 0.4614163041114807, | |
| "log_odds_ratio": -0.5477044582366943, | |
| "logits/chosen": 3.945091724395752, | |
| "logits/rejected": 4.783943176269531, | |
| "logps/chosen": -0.670985758304596, | |
| "logps/rejected": -0.8528381586074829, | |
| "loss": 0.6328, | |
| "nll_loss": 0.5353778004646301, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03354928642511368, | |
| "rewards/margins": 0.009092616848647594, | |
| "rewards/rejected": -0.04264190047979355, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 2.3836564731139807e-06, | |
| "log_odds_chosen": 0.519318699836731, | |
| "log_odds_ratio": -0.5034213066101074, | |
| "logits/chosen": 3.990828037261963, | |
| "logits/rejected": 4.283727645874023, | |
| "logps/chosen": -0.7843809723854065, | |
| "logps/rejected": -1.1084554195404053, | |
| "loss": 0.598, | |
| "nll_loss": 0.6064985394477844, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.03921904414892197, | |
| "rewards/margins": 0.01620371639728546, | |
| "rewards/rejected": -0.055422764271497726, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 2.015625, | |
| "learning_rate": 2.357022603955159e-06, | |
| "log_odds_chosen": 1.2161670923233032, | |
| "log_odds_ratio": -0.5558447241783142, | |
| "logits/chosen": 2.7631869316101074, | |
| "logits/rejected": 4.014997959136963, | |
| "logps/chosen": -0.4891352653503418, | |
| "logps/rejected": -1.057556390762329, | |
| "loss": 0.6063, | |
| "nll_loss": 0.5005042552947998, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.02445676364004612, | |
| "rewards/margins": 0.028421055525541306, | |
| "rewards/rejected": -0.052877821028232574, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 2.3312620206007847e-06, | |
| "log_odds_chosen": 0.8278636932373047, | |
| "log_odds_ratio": -0.43884754180908203, | |
| "logits/chosen": 4.009448051452637, | |
| "logits/rejected": 4.671367645263672, | |
| "logps/chosen": -0.7134698629379272, | |
| "logps/rejected": -1.146784782409668, | |
| "loss": 0.5862, | |
| "nll_loss": 0.5619599223136902, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03567349165678024, | |
| "rewards/margins": 0.021665748208761215, | |
| "rewards/rejected": -0.05733924359083176, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 2.609375, | |
| "learning_rate": 2.3063280200722128e-06, | |
| "log_odds_chosen": 1.677671194076538, | |
| "log_odds_ratio": -0.2895694375038147, | |
| "logits/chosen": 2.985790491104126, | |
| "logits/rejected": 4.190914630889893, | |
| "logps/chosen": -0.5018793344497681, | |
| "logps/rejected": -1.0572091341018677, | |
| "loss": 0.5765, | |
| "nll_loss": 0.5001329183578491, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.025093963369727135, | |
| "rewards/margins": 0.02776649035513401, | |
| "rewards/rejected": -0.05286044999957085, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 2.0, | |
| "learning_rate": 2.2821773229381924e-06, | |
| "log_odds_chosen": 1.0791471004486084, | |
| "log_odds_ratio": -0.37350553274154663, | |
| "logits/chosen": 3.676426649093628, | |
| "logits/rejected": 3.8374907970428467, | |
| "logps/chosen": -0.7438164353370667, | |
| "logps/rejected": -1.29355788230896, | |
| "loss": 0.5652, | |
| "nll_loss": 0.6555451154708862, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.037190817296504974, | |
| "rewards/margins": 0.027487074956297874, | |
| "rewards/rejected": -0.0646779015660286, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 2.21875, | |
| "learning_rate": 2.2587697572631284e-06, | |
| "log_odds_chosen": 0.275502473115921, | |
| "log_odds_ratio": -0.7135687470436096, | |
| "logits/chosen": 4.321534156799316, | |
| "logits/rejected": 4.41732120513916, | |
| "logps/chosen": -0.9727070927619934, | |
| "logps/rejected": -1.0810346603393555, | |
| "loss": 0.5952, | |
| "nll_loss": 0.7110171914100647, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.04863535612821579, | |
| "rewards/margins": 0.005416377447545528, | |
| "rewards/rejected": -0.05405173450708389, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 2.15625, | |
| "learning_rate": 2.23606797749979e-06, | |
| "log_odds_chosen": 0.34863442182540894, | |
| "log_odds_ratio": -0.6463712453842163, | |
| "logits/chosen": 4.6876606941223145, | |
| "logits/rejected": 5.054124355316162, | |
| "logps/chosen": -0.9338000416755676, | |
| "logps/rejected": -1.1037800312042236, | |
| "loss": 0.5953, | |
| "nll_loss": 0.8528131246566772, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.04669000208377838, | |
| "rewards/margins": 0.008499005809426308, | |
| "rewards/rejected": -0.05518900603055954, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 2.171875, | |
| "learning_rate": 2.2140372138502386e-06, | |
| "log_odds_chosen": 0.9548345804214478, | |
| "log_odds_ratio": -0.39882007241249084, | |
| "logits/chosen": 3.5289406776428223, | |
| "logits/rejected": 3.8287463188171387, | |
| "logps/chosen": -0.6570809483528137, | |
| "logps/rejected": -1.1388274431228638, | |
| "loss": 0.609, | |
| "nll_loss": 0.5968061685562134, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.032854050397872925, | |
| "rewards/margins": 0.024087321013212204, | |
| "rewards/rejected": -0.05694136768579483, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.9609375, | |
| "learning_rate": 2.1926450482675734e-06, | |
| "log_odds_chosen": 0.4539831280708313, | |
| "log_odds_ratio": -0.5872747302055359, | |
| "logits/chosen": 3.2061939239501953, | |
| "logits/rejected": 4.589787006378174, | |
| "logps/chosen": -0.7979894280433655, | |
| "logps/rejected": -1.0285401344299316, | |
| "loss": 0.5827, | |
| "nll_loss": 0.6084668636322021, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.039899468421936035, | |
| "rewards/margins": 0.011527536436915398, | |
| "rewards/rejected": -0.051426999270915985, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 2.484375, | |
| "learning_rate": 2.1718612138153473e-06, | |
| "log_odds_chosen": 0.8493059277534485, | |
| "log_odds_ratio": -0.6372500658035278, | |
| "logits/chosen": 3.078615665435791, | |
| "logits/rejected": 4.099945068359375, | |
| "logps/chosen": -0.6704202890396118, | |
| "logps/rejected": -0.7899671792984009, | |
| "loss": 0.5788, | |
| "nll_loss": 0.5733928084373474, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.03352101519703865, | |
| "rewards/margins": 0.005977341439574957, | |
| "rewards/rejected": -0.039498358964920044, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 2.151657414559676e-06, | |
| "log_odds_chosen": 0.6374627351760864, | |
| "log_odds_ratio": -0.5592355728149414, | |
| "logits/chosen": 3.680483341217041, | |
| "logits/rejected": 3.9816291332244873, | |
| "logps/chosen": -0.8559755086898804, | |
| "logps/rejected": -1.1612054109573364, | |
| "loss": 0.6003, | |
| "nll_loss": 0.6403124928474426, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.04279877990484238, | |
| "rewards/margins": 0.015261486172676086, | |
| "rewards/rejected": -0.05806026607751846, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.8984375, | |
| "learning_rate": 2.132007163556104e-06, | |
| "log_odds_chosen": 1.399209976196289, | |
| "log_odds_ratio": -0.5735031366348267, | |
| "logits/chosen": 3.132289171218872, | |
| "logits/rejected": 3.5427193641662598, | |
| "logps/chosen": -0.5963010191917419, | |
| "logps/rejected": -0.9639393091201782, | |
| "loss": 0.5984, | |
| "nll_loss": 0.5058175325393677, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.029815051704645157, | |
| "rewards/margins": 0.018381912261247635, | |
| "rewards/rejected": -0.04819696769118309, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.859375, | |
| "learning_rate": 2.1128856368212917e-06, | |
| "log_odds_chosen": 0.688880443572998, | |
| "log_odds_ratio": -0.4902462959289551, | |
| "logits/chosen": 2.6950721740722656, | |
| "logits/rejected": 3.1528286933898926, | |
| "logps/chosen": -0.6383022665977478, | |
| "logps/rejected": -0.9691828489303589, | |
| "loss": 0.5718, | |
| "nll_loss": 0.4289799630641937, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03191510960459709, | |
| "rewards/margins": 0.016544032841920853, | |
| "rewards/rejected": -0.048459142446517944, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 2.421875, | |
| "learning_rate": 2.0942695414584777e-06, | |
| "log_odds_chosen": 1.3283271789550781, | |
| "log_odds_ratio": -0.3012233078479767, | |
| "logits/chosen": 3.4564871788024902, | |
| "logits/rejected": 4.7043867111206055, | |
| "logps/chosen": -0.6779360771179199, | |
| "logps/rejected": -1.523970365524292, | |
| "loss": 0.6138, | |
| "nll_loss": 0.5768535137176514, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.033896803855895996, | |
| "rewards/margins": 0.042301714420318604, | |
| "rewards/rejected": -0.0761985182762146, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.953125, | |
| "learning_rate": 2.0761369963434992e-06, | |
| "log_odds_chosen": 1.4566174745559692, | |
| "log_odds_ratio": -0.32581037282943726, | |
| "logits/chosen": 2.691676616668701, | |
| "logits/rejected": 4.661564826965332, | |
| "logps/chosen": -0.4493564963340759, | |
| "logps/rejected": -1.0139671564102173, | |
| "loss": 0.5782, | |
| "nll_loss": 0.37120580673217773, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.022467825561761856, | |
| "rewards/margins": 0.028230536729097366, | |
| "rewards/rejected": -0.05069836229085922, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 2.0625, | |
| "learning_rate": 2.058467423981546e-06, | |
| "log_odds_chosen": 1.0190517902374268, | |
| "log_odds_ratio": -0.5730624198913574, | |
| "logits/chosen": 3.407086133956909, | |
| "logits/rejected": 4.482596397399902, | |
| "logps/chosen": -0.7345553040504456, | |
| "logps/rejected": -0.9309635162353516, | |
| "loss": 0.5723, | |
| "nll_loss": 0.5519307851791382, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.03672776371240616, | |
| "rewards/margins": 0.009820410050451756, | |
| "rewards/rejected": -0.04654817283153534, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 2.375, | |
| "learning_rate": 2.0412414523193154e-06, | |
| "log_odds_chosen": 1.107779860496521, | |
| "log_odds_ratio": -0.40593117475509644, | |
| "logits/chosen": 3.215078830718994, | |
| "logits/rejected": 4.503358840942383, | |
| "logps/chosen": -0.663019597530365, | |
| "logps/rejected": -1.2786920070648193, | |
| "loss": 0.5815, | |
| "nll_loss": 0.5633824467658997, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.03315097838640213, | |
| "rewards/margins": 0.030783619731664658, | |
| "rewards/rejected": -0.06393460184335709, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 2.0244408254472904e-06, | |
| "log_odds_chosen": 0.7602224349975586, | |
| "log_odds_ratio": -0.5018362998962402, | |
| "logits/chosen": 3.604353666305542, | |
| "logits/rejected": 4.481316089630127, | |
| "logps/chosen": -0.7105517387390137, | |
| "logps/rejected": -1.0740478038787842, | |
| "loss": 0.5873, | |
| "nll_loss": 0.5312780737876892, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.035527586936950684, | |
| "rewards/margins": 0.018174810335040092, | |
| "rewards/rejected": -0.05370239168405533, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 2.0080483222562476e-06, | |
| "log_odds_chosen": 1.3286904096603394, | |
| "log_odds_ratio": -0.36574870347976685, | |
| "logits/chosen": 3.620469331741333, | |
| "logits/rejected": 4.373411655426025, | |
| "logps/chosen": -0.4990506172180176, | |
| "logps/rejected": -0.953050971031189, | |
| "loss": 0.5716, | |
| "nll_loss": 0.5527733564376831, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.024952532723546028, | |
| "rewards/margins": 0.022700021043419838, | |
| "rewards/rejected": -0.047652553766965866, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 2.359375, | |
| "learning_rate": 1.9920476822239895e-06, | |
| "log_odds_chosen": 0.4847317636013031, | |
| "log_odds_ratio": -0.5640643835067749, | |
| "logits/chosen": 3.125113010406494, | |
| "logits/rejected": 3.340205669403076, | |
| "logps/chosen": -0.8360971212387085, | |
| "logps/rejected": -1.0480194091796875, | |
| "loss": 0.5738, | |
| "nll_loss": 0.6136351823806763, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.041804857552051544, | |
| "rewards/margins": 0.010596117004752159, | |
| "rewards/rejected": -0.05240097641944885, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 2.09375, | |
| "learning_rate": 1.976423537605237e-06, | |
| "log_odds_chosen": 0.8931509256362915, | |
| "log_odds_ratio": -0.40087467432022095, | |
| "logits/chosen": 3.574153423309326, | |
| "logits/rejected": 4.537802219390869, | |
| "logps/chosen": -0.6440940499305725, | |
| "logps/rejected": -1.088226556777954, | |
| "loss": 0.5846, | |
| "nll_loss": 0.5598152875900269, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.032204702496528625, | |
| "rewards/margins": 0.0222066268324852, | |
| "rewards/rejected": -0.054411329329013824, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 2.5625, | |
| "learning_rate": 1.961161351381841e-06, | |
| "log_odds_chosen": 1.2053475379943848, | |
| "log_odds_ratio": -0.430248886346817, | |
| "logits/chosen": 2.245370388031006, | |
| "logits/rejected": 3.5309462547302246, | |
| "logps/chosen": -0.5642444491386414, | |
| "logps/rejected": -0.9910544157028198, | |
| "loss": 0.5605, | |
| "nll_loss": 0.45886915922164917, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.028212225064635277, | |
| "rewards/margins": 0.021340493112802505, | |
| "rewards/rejected": -0.04955272004008293, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 651, | |
| "total_flos": 0.0, | |
| "train_loss": 0.812556631554107, | |
| "train_runtime": 4771.9621, | |
| "train_samples_per_second": 4.358, | |
| "train_steps_per_second": 0.136 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 651, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |