{ "run_name": "run_large_20260115_191350", "timestamp": "20260115_191350", "phase": "large", "config": { "arch_layout": [ "m4", [ "T1m4", [ "T22" ], "m4T1" ], "m4" ], "d_model": [ 1024, 1024, 1536 ], "d_intermediate": [ 0, 2816, 4096 ], "vocab_size": 256, "ssm_cfg": { "chunk_size": 256, "d_conv": 4, "d_state": 128, "expand": 2 }, "attn_cfg": { "num_heads": [ 16, 16, 16 ], "rotary_emb_dim": [ 32, 32, 48 ], "window_size": [ 1023, 1023, -1 ] }, "tie_embeddings": false }, "training_args": { "data": "datasets/PI1M/PI1M_v2.csv", "max_samples": null, "batch_size": 16, "epochs": 5, "lr": 0.0001, "weight_decay": 0.1, "gradient_accumulation": 8, "concatenate": true, "num_concatenate": 10, "concatenate_separator": " ", "checkpoint_bytes": 1000000, "num_test_samples": 5, "num_visualize": 5, "skip_visualization": false }, "dataset_info": { "train_size": 99574, "test_size": 5, "test_smiles_file": "checkpoints/run_large_20260115_191350/test_smiles.txt" }, "model_info": { "num_parameters": 622923776, "device": "cuda", "dtype": "torch.bfloat16", "use_amp": true }, "training_history": [ { "checkpoint_type": "bytes", "bytes_threshold": 1000000, "cumulative_training_bytes": 1000166, "metrics": { "loss": 3.0352404484382043, "ce_loss": 3.0252403846153846, "lb_loss": 0.9999999889960656 } }, { "checkpoint_type": "bytes", "bytes_threshold": 2000000, "cumulative_training_bytes": 2000240, "metrics": { "loss": 2.107340772335346, "ce_loss": 2.097340745192308, "lb_loss": 0.9999999871620765 } }, { "checkpoint_type": "bytes", "bytes_threshold": 3000000, "cumulative_training_bytes": 3001794, "metrics": { "loss": 1.7094185730380476, "ce_loss": 1.6994185581841432, "lb_loss": 0.9999999873473516 } }, { "checkpoint_type": "bytes", "bytes_threshold": 4000000, "cumulative_training_bytes": 4002359, "metrics": { "loss": 1.47650072853762, "ce_loss": 1.4665007197696738, "lb_loss": 0.9999999890171863 } }, { "checkpoint_type": "bytes", "bytes_threshold": 5000000, "cumulative_training_bytes": 5005670, "metrics": { "loss": 1.3171558716545808, "ce_loss": 1.3071558665644172, "lb_loss": 0.9999999897611653 } }, { "checkpoint_type": "bytes", "bytes_threshold": 6000000, "cumulative_training_bytes": 6001321, "metrics": { "loss": 1.2017559169808312, "ce_loss": 1.1917559143222507, "lb_loss": 0.9999999908535072 } }, { "checkpoint_type": "bytes", "bytes_threshold": 7000000, "cumulative_training_bytes": 7001673, "metrics": { "loss": 1.1151093587948484, "ce_loss": 1.1051093578860898, "lb_loss": 0.9999999904684795 } }, { "checkpoint_type": "bytes", "bytes_threshold": 8000000, "cumulative_training_bytes": 8004669, "metrics": { "loss": 1.0468063034773787, "ce_loss": 1.0368063038793103, "lb_loss": 0.9999999897804297 } }, { "checkpoint_type": "bytes", "bytes_threshold": 9000000, "cumulative_training_bytes": 9006752, "metrics": { "loss": 0.9919913549626127, "ce_loss": 0.9819913563829787, "lb_loss": 0.9999999897023465 } }, { "checkpoint_type": "bytes", "bytes_threshold": 10000000, "cumulative_training_bytes": 10007281, "metrics": { "loss": 0.9471440684010387, "ce_loss": 0.9371440706355283, "lb_loss": 0.9999999893660932 } }, { "checkpoint_type": "bytes", "bytes_threshold": 11000000, "cumulative_training_bytes": 11001365, "metrics": { "loss": 0.9100927569407938, "ce_loss": 0.900092759836351, "lb_loss": 0.999999989540132 } }, { "checkpoint_type": "bytes", "bytes_threshold": 12000000, "cumulative_training_bytes": 12005386, "metrics": { "loss": 0.8784949809940438, "ce_loss": 0.868494984444799, "lb_loss": 0.999999989882045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 13000000, "cumulative_training_bytes": 13001269, "metrics": { "loss": 0.8592479796569771, "ce_loss": 0.849247983573954, "lb_loss": 0.999999989954668 } }, { "checkpoint_type": "bytes", "bytes_threshold": 14000000, "cumulative_training_bytes": 14005280, "metrics": { "loss": 0.8378439935604906, "ce_loss": 0.8278439978801969, "lb_loss": 0.9999999899245978 } }, { "checkpoint_type": "bytes", "bytes_threshold": 15000000, "cumulative_training_bytes": 15001797, "metrics": { "loss": 0.8179623213681307, "ce_loss": 0.8079623260342186, "lb_loss": 0.9999999895889742 } }, { "checkpoint_type": "bytes", "bytes_threshold": 16000000, "cumulative_training_bytes": 16003308, "metrics": { "loss": 0.7999628585397256, "ce_loss": 0.7899628635112494, "lb_loss": 0.999999989471463 } }, { "checkpoint_type": "bytes", "bytes_threshold": 17000000, "cumulative_training_bytes": 17001780, "metrics": { "loss": 0.783798369592028, "ce_loss": 0.773798374831005, "lb_loss": 0.9999999887720858 } }, { "checkpoint_type": "bytes", "bytes_threshold": 18000000, "cumulative_training_bytes": 18002585, "metrics": { "loss": 0.7691971354788922, "ce_loss": 0.7591971409574468, "lb_loss": 0.9999999888399814 } }, { "checkpoint_type": "bytes", "bytes_threshold": 19000000, "cumulative_training_bytes": 19004388, "metrics": { "loss": 0.7562685100266358, "ce_loss": 0.746268515719468, "lb_loss": 0.9999999887325359 } }, { "checkpoint_type": "bytes", "bytes_threshold": 20000000, "cumulative_training_bytes": 20001795, "metrics": { "loss": 0.7443181650561906, "ce_loss": 0.7343181709418071, "lb_loss": 0.9999999887043265 } }, { "checkpoint_type": "bytes", "bytes_threshold": 21000000, "cumulative_training_bytes": 21006219, "metrics": { "loss": 0.7334088699425653, "ce_loss": 0.723408876002552, "lb_loss": 0.9999999888743791 } }, { "checkpoint_type": "bytes", "bytes_threshold": 22000000, "cumulative_training_bytes": 22003647, "metrics": { "loss": 0.7233542565306926, "ce_loss": 0.7133542627479986, "lb_loss": 0.9999999891080966 } }, { "checkpoint_type": "bytes", "bytes_threshold": 23000000, "cumulative_training_bytes": 23000855, "metrics": { "loss": 0.7141935865044633, "ce_loss": 0.7041935928654679, "lb_loss": 0.9999999891627919 } }, { "checkpoint_type": "bytes", "bytes_threshold": 24000000, "cumulative_training_bytes": 24007583, "metrics": { "loss": 0.7056202586567953, "ce_loss": 0.6956202651515152, "lb_loss": 0.9999999891818045 } }, { "checkpoint_type": "bytes", "bytes_threshold": 25000000, "cumulative_training_bytes": 25004319, "metrics": { "loss": 0.6978230217149393, "ce_loss": 0.687823028330781, "lb_loss": 0.9999999895577774 } }, { "checkpoint_type": "bytes", "bytes_threshold": 26000000, "cumulative_training_bytes": 26000600, "metrics": { "loss": 0.6906206210337261, "ce_loss": 0.6806206277614139, "lb_loss": 0.9999999897293911 } }, { "checkpoint_type": "bytes", "bytes_threshold": 27000000, "cumulative_training_bytes": 27007515, "metrics": { "loss": 0.6838098439610576, "ce_loss": 0.6738098507938758, "lb_loss": 0.9999999897926835 } }, { "checkpoint_type": "bytes", "bytes_threshold": 28000000, "cumulative_training_bytes": 28003023, "metrics": { "loss": 0.6774992880874688, "ce_loss": 0.6674992950164069, "lb_loss": 0.9999999895687797 } }, { "checkpoint_type": "bytes", "bytes_threshold": 29000000, "cumulative_training_bytes": 29003935, "metrics": { "loss": 0.6715684946638226, "ce_loss": 0.6615685016829461, "lb_loss": 0.9999999895046732 } }, { "checkpoint_type": "bytes", "bytes_threshold": 30000000, "cumulative_training_bytes": 30001066, "metrics": { "loss": 0.6660281601701846, "ce_loss": 0.6560281672728433, "lb_loss": 0.9999999894573715 } }, { "checkpoint_type": "bytes", "bytes_threshold": 31000000, "cumulative_training_bytes": 31004436, "metrics": { "loss": 0.6609612641201458, "ce_loss": 0.6509612713015559, "lb_loss": 0.9999999894746058 } }, { "checkpoint_type": "bytes", "bytes_threshold": 32000000, "cumulative_training_bytes": 32006649, "metrics": { "loss": 0.6561554203763533, "ce_loss": 0.646155427631579, "lb_loss": 0.9999999895050194 } }, { "checkpoint_type": "bytes", "bytes_threshold": 33000000, "cumulative_training_bytes": 33004203, "metrics": { "loss": 0.6516305574961438, "ce_loss": 0.6416305648201857, "lb_loss": 0.9999999895588151 } }, { "checkpoint_type": "bytes", "bytes_threshold": 34000000, "cumulative_training_bytes": 34006104, "metrics": { "loss": 0.6472530922785559, "ce_loss": 0.6372530996678676, "lb_loss": 0.9999999896520646 } }, { "checkpoint_type": "bytes", "bytes_threshold": 35000000, "cumulative_training_bytes": 35005618, "metrics": { "loss": 0.6431124474281974, "ce_loss": 0.6331124548785824, "lb_loss": 0.9999999896725271 } }, { "checkpoint_type": "bytes", "bytes_threshold": 36000000, "cumulative_training_bytes": 36002823, "metrics": { "loss": 0.6391829455870056, "ce_loss": 0.6291829530950862, "lb_loss": 0.9999999896918579 } }, { "checkpoint_type": "bytes", "bytes_threshold": 37000000, "cumulative_training_bytes": 37006427, "metrics": { "loss": 0.6354130913090232, "ce_loss": 0.6254130988721026, "lb_loss": 0.9999999896752716 } }, { "checkpoint_type": "bytes", "bytes_threshold": 38000000, "cumulative_training_bytes": 38005922, "metrics": { "loss": 0.6318843585695924, "ce_loss": 0.6218843661847673, "lb_loss": 0.9999999897196099 } }, { "checkpoint_type": "bytes", "bytes_threshold": 39000000, "cumulative_training_bytes": 39004443, "metrics": { "loss": 0.6285198655931632, "ce_loss": 0.6185198732577543, "lb_loss": 0.9999999895276488 } }, { "checkpoint_type": "bytes", "bytes_threshold": 40000000, "cumulative_training_bytes": 40005613, "metrics": { "loss": 0.6254313996155814, "ce_loss": 0.615431407326761, "lb_loss": 0.9999999897083863 } }, { "checkpoint_type": "bytes", "bytes_threshold": 41000000, "cumulative_training_bytes": 41003596, "metrics": { "loss": 0.6224746753085582, "ce_loss": 0.6124746830640643, "lb_loss": 0.9999999896242941 } }, { "checkpoint_type": "bytes", "bytes_threshold": 42000000, "cumulative_training_bytes": 42004130, "metrics": { "loss": 0.619576180100767, "ce_loss": 0.609576187898815, "lb_loss": 0.9999999894482935 } }, { "checkpoint_type": "bytes", "bytes_threshold": 43000000, "cumulative_training_bytes": 43002856, "metrics": { "loss": 0.6168661168497852, "ce_loss": 0.6068661246883903, "lb_loss": 0.9999999894715442 } }, { "checkpoint_type": "bytes", "bytes_threshold": 44000000, "cumulative_training_bytes": 44000615, "metrics": { "loss": 0.6142508432585481, "ce_loss": 0.6042508511355725, "lb_loss": 0.9999999894192938 } }, { "checkpoint_type": "bytes", "bytes_threshold": 45000000, "cumulative_training_bytes": 45002728, "metrics": { "loss": 0.6117183565789184, "ce_loss": 0.6017183644929386, "lb_loss": 0.9999999893305962 } }, { "checkpoint_type": "bytes", "bytes_threshold": 46000000, "cumulative_training_bytes": 46000713, "metrics": { "loss": 0.6093004826243594, "ce_loss": 0.5993004905734975, "lb_loss": 0.9999999892538988 } }, { "checkpoint_type": "bytes", "bytes_threshold": 47000000, "cumulative_training_bytes": 47001586, "metrics": { "loss": 0.6069603338424916, "ce_loss": 0.5969603418255132, "lb_loss": 0.999999989075395 } }, { "epoch": 1, "checkpoint_type": "epoch", "metrics": { "loss": 0.6054869050538325, "ce_loss": 0.5954869130583226, "lb_loss": 0.9999999890922734, "training_bytes": 47653409 }, "cumulative_training_bytes": 47653409, "training_bytes_this_epoch": 47653409 }, { "checkpoint_type": "bytes", "bytes_threshold": 48000000, "cumulative_training_bytes": 48006676, "metrics": { "loss": 0.49496941981108294, "ce_loss": 0.4849694293478261, "lb_loss": 0.9999999935212343 } }, { "checkpoint_type": "bytes", "bytes_threshold": 49000000, "cumulative_training_bytes": 49000759, "metrics": { "loss": 0.49630592086098413, "ce_loss": 0.4863059303977273, "lb_loss": 0.9999999932267449 } }, { "checkpoint_type": "bytes", "bytes_threshold": 50000000, "cumulative_training_bytes": 50005240, "metrics": { "loss": 0.4959718451049506, "ce_loss": 0.4859718546416938, "lb_loss": 0.9999999914573148 } }, { "checkpoint_type": "bytes", "bytes_threshold": 51000000, "cumulative_training_bytes": 51007539, "metrics": { "loss": 0.49752317824864495, "ce_loss": 0.4875231877853881, "lb_loss": 0.9999999910184781 } }, { "checkpoint_type": "bytes", "bytes_threshold": 52000000, "cumulative_training_bytes": 52002554, "metrics": { "loss": 0.4988107849174822, "ce_loss": 0.4888107944542254, "lb_loss": 0.9999999891914112 } }, { "checkpoint_type": "bytes", "bytes_threshold": 53000000, "cumulative_training_bytes": 53005306, "metrics": { "loss": 0.49884286868214095, "ce_loss": 0.4888428782188841, "lb_loss": 0.9999999886589159 } }, { "checkpoint_type": "bytes", "bytes_threshold": 54000000, "cumulative_training_bytes": 54000123, "metrics": { "loss": 0.49843673654287085, "ce_loss": 0.488436746079614, "lb_loss": 0.9999999882803895 } }, { "checkpoint_type": "bytes", "bytes_threshold": 55000000, "cumulative_training_bytes": 55003152, "metrics": { "loss": 0.4980025132497152, "ce_loss": 0.48800252278645834, "lb_loss": 0.9999999890724818 } }, { "checkpoint_type": "bytes", "bytes_threshold": 56000000, "cumulative_training_bytes": 56002937, "metrics": { "loss": 0.4978086235979956, "ce_loss": 0.48780863313473877, "lb_loss": 0.9999999890733924 } }, { "checkpoint_type": "bytes", "bytes_threshold": 57000000, "cumulative_training_bytes": 57004703, "metrics": { "loss": 0.4975252436342879, "ce_loss": 0.48752525317103107, "lb_loss": 0.9999999889765551 } }, { "checkpoint_type": "bytes", "bytes_threshold": 58000000, "cumulative_training_bytes": 58002959, "metrics": { "loss": 0.49715732681680713, "ce_loss": 0.4871573363535503, "lb_loss": 0.9999999886698271 } }, { "checkpoint_type": "bytes", "bytes_threshold": 59000000, "cumulative_training_bytes": 59000108, "metrics": { "loss": 0.4970432515893526, "ce_loss": 0.48704326112609575, "lb_loss": 0.9999999883443378 } }, { "checkpoint_type": "bytes", "bytes_threshold": 60000000, "cumulative_training_bytes": 60007478, "metrics": { "loss": 0.4969303793951454, "ce_loss": 0.48693038893188856, "lb_loss": 0.9999999884481401 } }, { "checkpoint_type": "bytes", "bytes_threshold": 61000000, "cumulative_training_bytes": 61002660, "metrics": { "loss": 0.49673105242600757, "ce_loss": 0.48673106196275073, "lb_loss": 0.9999999883864875 } }, { "checkpoint_type": "bytes", "bytes_threshold": 62000000, "cumulative_training_bytes": 62003465, "metrics": { "loss": 0.49654987219300095, "ce_loss": 0.4865498817297441, "lb_loss": 0.9999999883713753 } }, { "checkpoint_type": "bytes", "bytes_threshold": 63000000, "cumulative_training_bytes": 63000868, "metrics": { "loss": 0.4964099013555799, "ce_loss": 0.48640991089232305, "lb_loss": 0.9999999887089905 } }, { "checkpoint_type": "bytes", "bytes_threshold": 64000000, "cumulative_training_bytes": 64003546, "metrics": { "loss": 0.49635096437528303, "ce_loss": 0.4863509739120262, "lb_loss": 0.9999999889827633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 65000000, "cumulative_training_bytes": 65001846, "metrics": { "loss": 0.4962221452934289, "ce_loss": 0.48622215483017206, "lb_loss": 0.9999999886680185 } }, { "checkpoint_type": "bytes", "bytes_threshold": 66000000, "cumulative_training_bytes": 66004938, "metrics": { "loss": 0.4961587034532485, "ce_loss": 0.48615871298999164, "lb_loss": 0.9999999882679765 } }, { "checkpoint_type": "bytes", "bytes_threshold": 67000000, "cumulative_training_bytes": 67000216, "metrics": { "loss": 0.49601907669743406, "ce_loss": 0.4860190862341772, "lb_loss": 0.9999999884704623 } }, { "checkpoint_type": "bytes", "bytes_threshold": 68000000, "cumulative_training_bytes": 68000224, "metrics": { "loss": 0.4964207015242049, "ce_loss": 0.4864207110609481, "lb_loss": 0.9999999881822244 } }, { "checkpoint_type": "bytes", "bytes_threshold": 69000000, "cumulative_training_bytes": 69005372, "metrics": { "loss": 0.49684213258408866, "ce_loss": 0.4868421421208318, "lb_loss": 0.9999999881602821 } }, { "checkpoint_type": "bytes", "bytes_threshold": 70000000, "cumulative_training_bytes": 70001864, "metrics": { "loss": 0.497037369488608, "ce_loss": 0.48703737902535116, "lb_loss": 0.9999999881770848 } }, { "checkpoint_type": "bytes", "bytes_threshold": 71000000, "cumulative_training_bytes": 71000907, "metrics": { "loss": 0.49706029712117744, "ce_loss": 0.4870603066579206, "lb_loss": 0.9999999880360634 } }, { "checkpoint_type": "bytes", "bytes_threshold": 72000000, "cumulative_training_bytes": 72005398, "metrics": { "loss": 0.49712042088778513, "ce_loss": 0.4871204304245283, "lb_loss": 0.9999999880790711 } }, { "checkpoint_type": "bytes", "bytes_threshold": 73000000, "cumulative_training_bytes": 73003962, "metrics": { "loss": 0.49715716096929913, "ce_loss": 0.4871571705060423, "lb_loss": 0.9999999879890338 } }, { "checkpoint_type": "bytes", "bytes_threshold": 74000000, "cumulative_training_bytes": 74006324, "metrics": { "loss": 0.4971806565123705, "ce_loss": 0.48718066604911364, "lb_loss": 0.9999999879612822 } }, { "checkpoint_type": "bytes", "bytes_threshold": 75000000, "cumulative_training_bytes": 75002178, "metrics": { "loss": 0.4972360369138309, "ce_loss": 0.48723604645057406, "lb_loss": 0.999999987898805 } }, { "checkpoint_type": "bytes", "bytes_threshold": 76000000, "cumulative_training_bytes": 76006119, "metrics": { "loss": 0.49723345379388895, "ce_loss": 0.4872334633306321, "lb_loss": 0.9999999879728066 } }, { "checkpoint_type": "bytes", "bytes_threshold": 77000000, "cumulative_training_bytes": 77005284, "metrics": { "loss": 0.4972499007815454, "ce_loss": 0.48724991031828857, "lb_loss": 0.9999999881039516 } }, { "checkpoint_type": "bytes", "bytes_threshold": 78000000, "cumulative_training_bytes": 78007177, "metrics": { "loss": 0.4972263361683527, "ce_loss": 0.4872263457050959, "lb_loss": 0.9999999881362097 } }, { "checkpoint_type": "bytes", "bytes_threshold": 79000000, "cumulative_training_bytes": 79001491, "metrics": { "loss": 0.4971963830499691, "ce_loss": 0.48719639258671227, "lb_loss": 0.9999999881780725 } }, { "checkpoint_type": "bytes", "bytes_threshold": 80000000, "cumulative_training_bytes": 80002957, "metrics": { "loss": 0.49715744238633375, "ce_loss": 0.4871574519230769, "lb_loss": 0.9999999881778243 } }, { "checkpoint_type": "bytes", "bytes_threshold": 81000000, "cumulative_training_bytes": 81002131, "metrics": { "loss": 0.4970846991314543, "ce_loss": 0.4870847086681975, "lb_loss": 0.9999999881201305 } }, { "checkpoint_type": "bytes", "bytes_threshold": 82000000, "cumulative_training_bytes": 82000379, "metrics": { "loss": 0.497049108552869, "ce_loss": 0.48704911808961215, "lb_loss": 0.9999999881481625 } }, { "checkpoint_type": "bytes", "bytes_threshold": 83000000, "cumulative_training_bytes": 83002326, "metrics": { "loss": 0.49690102084670573, "ce_loss": 0.4869010303834489, "lb_loss": 0.9999999881849545 } }, { "checkpoint_type": "bytes", "bytes_threshold": 84000000, "cumulative_training_bytes": 84004823, "metrics": { "loss": 0.4968436548828903, "ce_loss": 0.48684366441963345, "lb_loss": 0.9999999882473252 } }, { "checkpoint_type": "bytes", "bytes_threshold": 85000000, "cumulative_training_bytes": 85001132, "metrics": { "loss": 0.496751819840948, "ce_loss": 0.4867518293776912, "lb_loss": 0.9999999883161697 } }, { "checkpoint_type": "bytes", "bytes_threshold": 86000000, "cumulative_training_bytes": 86000628, "metrics": { "loss": 0.4967399565175699, "ce_loss": 0.4867399660543131, "lb_loss": 0.9999999883718574 } }, { "checkpoint_type": "bytes", "bytes_threshold": 87000000, "cumulative_training_bytes": 87000672, "metrics": { "loss": 0.49681193101589355, "ce_loss": 0.4868119405526367, "lb_loss": 0.9999999883783122 } }, { "checkpoint_type": "bytes", "bytes_threshold": 88000000, "cumulative_training_bytes": 88002075, "metrics": { "loss": 0.49670176321425324, "ce_loss": 0.4867017727509964, "lb_loss": 0.9999999882917427 } }, { "checkpoint_type": "bytes", "bytes_threshold": 89000000, "cumulative_training_bytes": 89004728, "metrics": { "loss": 0.49663121152807166, "ce_loss": 0.4866312210648148, "lb_loss": 0.9999999883770943 } }, { "checkpoint_type": "bytes", "bytes_threshold": 90000000, "cumulative_training_bytes": 90003725, "metrics": { "loss": 0.49656294246108723, "ce_loss": 0.4865629519978304, "lb_loss": 0.999999988555391 } }, { "checkpoint_type": "bytes", "bytes_threshold": 91000000, "cumulative_training_bytes": 91002611, "metrics": { "loss": 0.4965044176845958, "ce_loss": 0.48650442722133896, "lb_loss": 0.9999999886813296 } }, { "checkpoint_type": "bytes", "bytes_threshold": 92000000, "cumulative_training_bytes": 92003164, "metrics": { "loss": 0.4964984069213024, "ce_loss": 0.4864984164580456, "lb_loss": 0.9999999888961651 } }, { "checkpoint_type": "bytes", "bytes_threshold": 93000000, "cumulative_training_bytes": 93001402, "metrics": { "loss": 0.49645113397473944, "ce_loss": 0.4864511435114826, "lb_loss": 0.999999989119787 } }, { "checkpoint_type": "bytes", "bytes_threshold": 94000000, "cumulative_training_bytes": 94007638, "metrics": { "loss": 0.4963942520052126, "ce_loss": 0.48639426154195575, "lb_loss": 0.9999999891207247 } }, { "checkpoint_type": "bytes", "bytes_threshold": 95000000, "cumulative_training_bytes": 95004271, "metrics": { "loss": 0.4963107445261611, "ce_loss": 0.48631075406290425, "lb_loss": 0.9999999891373812 } }, { "epoch": 2, "checkpoint_type": "epoch", "metrics": { "loss": 0.4962876345627106, "ce_loss": 0.48628764409945374, "lb_loss": 0.999999989168886, "training_bytes": 47653416 }, "cumulative_training_bytes": 95306825, "training_bytes_this_epoch": 47653416 }, { "checkpoint_type": "bytes", "bytes_threshold": 96000000, "cumulative_training_bytes": 96003218, "metrics": { "loss": 0.49025411134237773, "ce_loss": 0.4802541208791209, "lb_loss": 0.9999999908300546 } }, { "checkpoint_type": "bytes", "bytes_threshold": 97000000, "cumulative_training_bytes": 97000816, "metrics": { "loss": 0.4910255136533021, "ce_loss": 0.48102552319004527, "lb_loss": 0.9999999905603504 } }, { "checkpoint_type": "bytes", "bytes_threshold": 98000000, "cumulative_training_bytes": 98005358, "metrics": { "loss": 0.49233333855107553, "ce_loss": 0.4823333480878187, "lb_loss": 0.9999999910508607 } }, { "checkpoint_type": "bytes", "bytes_threshold": 99000000, "cumulative_training_bytes": 99000141, "metrics": { "loss": 0.4918436110636709, "ce_loss": 0.4818436206004141, "lb_loss": 0.999999992102076 } }, { "checkpoint_type": "bytes", "bytes_threshold": 100000000, "cumulative_training_bytes": 100005926, "metrics": { "loss": 0.4912067290626054, "ce_loss": 0.48120673859934854, "lb_loss": 0.9999999912631629 } }, { "checkpoint_type": "bytes", "bytes_threshold": 101000000, "cumulative_training_bytes": 101001458, "metrics": { "loss": 0.4909990244014289, "ce_loss": 0.48099903393817206, "lb_loss": 0.999999990947144 } }, { "checkpoint_type": "bytes", "bytes_threshold": 102000000, "cumulative_training_bytes": 102004630, "metrics": { "loss": 0.49028549532595705, "ce_loss": 0.4802855048627002, "lb_loss": 0.9999999912707156 } }, { "checkpoint_type": "bytes", "bytes_threshold": 103000000, "cumulative_training_bytes": 103004382, "metrics": { "loss": 0.490558137229426, "ce_loss": 0.48055814676616915, "lb_loss": 0.99999999092586 } }, { "checkpoint_type": "bytes", "bytes_threshold": 104000000, "cumulative_training_bytes": 104002283, "metrics": { "loss": 0.49042572008880747, "ce_loss": 0.48042572962555063, "lb_loss": 0.9999999908623717 } }, { "checkpoint_type": "bytes", "bytes_threshold": 105000000, "cumulative_training_bytes": 105006513, "metrics": { "loss": 0.49059360480816605, "ce_loss": 0.4805936143449092, "lb_loss": 0.9999999903559967 } }, { "checkpoint_type": "bytes", "bytes_threshold": 106000000, "cumulative_training_bytes": 106006613, "metrics": { "loss": 0.4903415147116462, "ce_loss": 0.4803415242483894, "lb_loss": 0.9999999906561079 } }, { "checkpoint_type": "bytes", "bytes_threshold": 107000000, "cumulative_training_bytes": 107005607, "metrics": { "loss": 0.4906465298378475, "ce_loss": 0.4806465393745907, "lb_loss": 0.9999999903976801 } }, { "checkpoint_type": "bytes", "bytes_threshold": 108000000, "cumulative_training_bytes": 108001197, "metrics": { "loss": 0.4906608704421343, "ce_loss": 0.48066087997887746, "lb_loss": 0.9999999902877164 } }, { "checkpoint_type": "bytes", "bytes_threshold": 109000000, "cumulative_training_bytes": 109001691, "metrics": { "loss": 0.49069485728372664, "ce_loss": 0.4806948668204698, "lb_loss": 0.9999999900325566 } }, { "checkpoint_type": "bytes", "bytes_threshold": 110000000, "cumulative_training_bytes": 110007304, "metrics": { "loss": 0.4906437990875403, "ce_loss": 0.48064380862428346, "lb_loss": 0.9999999899985953 } }, { "checkpoint_type": "bytes", "bytes_threshold": 111000000, "cumulative_training_bytes": 111006246, "metrics": { "loss": 0.49070311546325684, "ce_loss": 0.480703125, "lb_loss": 0.9999999900562008 } }, { "checkpoint_type": "bytes", "bytes_threshold": 112000000, "cumulative_training_bytes": 112006808, "metrics": { "loss": 0.4907320227878786, "ce_loss": 0.48073203232462175, "lb_loss": 0.9999999894783181 } }, { "checkpoint_type": "bytes", "bytes_threshold": 113000000, "cumulative_training_bytes": 113006280, "metrics": { "loss": 0.4907356900739835, "ce_loss": 0.48073569961072665, "lb_loss": 0.999999989610436 } }, { "checkpoint_type": "bytes", "bytes_threshold": 114000000, "cumulative_training_bytes": 114000244, "metrics": { "loss": 0.4906710912515451, "ce_loss": 0.4806711007882883, "lb_loss": 0.9999999897974031 } }, { "checkpoint_type": "bytes", "bytes_threshold": 115000000, "cumulative_training_bytes": 115000090, "metrics": { "loss": 0.49064408903496304, "ce_loss": 0.4806440985717062, "lb_loss": 0.9999999897608811 } }, { "checkpoint_type": "bytes", "bytes_threshold": 116000000, "cumulative_training_bytes": 116003964, "metrics": { "loss": 0.4908688999492036, "ce_loss": 0.48086890948594674, "lb_loss": 0.9999999897499409 } }, { "checkpoint_type": "bytes", "bytes_threshold": 117000000, "cumulative_training_bytes": 117001141, "metrics": { "loss": 0.49077886969755463, "ce_loss": 0.4807788792342978, "lb_loss": 0.9999999896522636 } }, { "checkpoint_type": "bytes", "bytes_threshold": 118000000, "cumulative_training_bytes": 118002964, "metrics": { "loss": 0.49081061967910844, "ce_loss": 0.4808106292158516, "lb_loss": 0.9999999897073936 } }, { "checkpoint_type": "bytes", "bytes_threshold": 119000000, "cumulative_training_bytes": 119004829, "metrics": { "loss": 0.49074190038735244, "ce_loss": 0.4807419099240956, "lb_loss": 0.9999999899118753 } }, { "checkpoint_type": "bytes", "bytes_threshold": 120000000, "cumulative_training_bytes": 120005174, "metrics": { "loss": 0.49069510202198013, "ce_loss": 0.4806951115587233, "lb_loss": 0.999999989785755 } }, { "checkpoint_type": "bytes", "bytes_threshold": 121000000, "cumulative_training_bytes": 121000398, "metrics": { "loss": 0.4906328099449369, "ce_loss": 0.4806328194816801, "lb_loss": 0.9999999898084403 } }, { "checkpoint_type": "bytes", "bytes_threshold": 122000000, "cumulative_training_bytes": 122005153, "metrics": { "loss": 0.4905734521533371, "ce_loss": 0.48057346169008025, "lb_loss": 0.9999999895931111 } }, { "checkpoint_type": "bytes", "bytes_threshold": 123000000, "cumulative_training_bytes": 123002062, "metrics": { "loss": 0.49056105234136627, "ce_loss": 0.48056106187810943, "lb_loss": 0.9999999894398626 } }, { "checkpoint_type": "bytes", "bytes_threshold": 124000000, "cumulative_training_bytes": 124006089, "metrics": { "loss": 0.4904723872690717, "ce_loss": 0.4804723968058149, "lb_loss": 0.9999999896498737 } }, { "checkpoint_type": "bytes", "bytes_threshold": 125000000, "cumulative_training_bytes": 125006477, "metrics": { "loss": 0.4903383307249222, "ce_loss": 0.4803383402616654, "lb_loss": 0.9999999898584517 } }, { "checkpoint_type": "bytes", "bytes_threshold": 126000000, "cumulative_training_bytes": 126002630, "metrics": { "loss": 0.49058030584739254, "ce_loss": 0.4805803153841357, "lb_loss": 0.9999999897561486 } }, { "checkpoint_type": "bytes", "bytes_threshold": 127000000, "cumulative_training_bytes": 127007067, "metrics": { "loss": 0.49066594004055153, "ce_loss": 0.4806659495772947, "lb_loss": 0.9999999898067419 } }, { "checkpoint_type": "bytes", "bytes_threshold": 128000000, "cumulative_training_bytes": 128000583, "metrics": { "loss": 0.49058034760611396, "ce_loss": 0.48058035714285713, "lb_loss": 0.999999989768102 } }, { "checkpoint_type": "bytes", "bytes_threshold": 129000000, "cumulative_training_bytes": 129007289, "metrics": { "loss": 0.4905069065050655, "ce_loss": 0.4805069160418087, "lb_loss": 0.9999999897476218 } }, { "checkpoint_type": "bytes", "bytes_threshold": 130000000, "cumulative_training_bytes": 130006166, "metrics": { "loss": 0.49045753542133275, "ce_loss": 0.4804575449580759, "lb_loss": 0.9999999899782128 } }, { "checkpoint_type": "bytes", "bytes_threshold": 131000000, "cumulative_training_bytes": 131001304, "metrics": { "loss": 0.4904289406187695, "ce_loss": 0.4804289501555127, "lb_loss": 0.9999999901426038 } }, { "checkpoint_type": "bytes", "bytes_threshold": 132000000, "cumulative_training_bytes": 132007108, "metrics": { "loss": 0.4903701265992885, "ce_loss": 0.4803701361360317, "lb_loss": 0.9999999899394623 } }, { "checkpoint_type": "bytes", "bytes_threshold": 133000000, "cumulative_training_bytes": 133003089, "metrics": { "loss": 0.49030012820954977, "ce_loss": 0.48030013774629293, "lb_loss": 0.9999999899266576 } }, { "checkpoint_type": "bytes", "bytes_threshold": 134000000, "cumulative_training_bytes": 134000170, "metrics": { "loss": 0.49024726003084046, "ce_loss": 0.4802472695675836, "lb_loss": 0.999999989902716 } }, { "checkpoint_type": "bytes", "bytes_threshold": 135000000, "cumulative_training_bytes": 135007268, "metrics": { "loss": 0.4902310506127265, "ce_loss": 0.48023106014946965, "lb_loss": 0.999999989883879 } }, { "checkpoint_type": "bytes", "bytes_threshold": 136000000, "cumulative_training_bytes": 136002367, "metrics": { "loss": 0.49015822482355786, "ce_loss": 0.48015823436030103, "lb_loss": 0.9999999898845927 } }, { "checkpoint_type": "bytes", "bytes_threshold": 137000000, "cumulative_training_bytes": 137002293, "metrics": { "loss": 0.49018864670178053, "ce_loss": 0.4801886562385237, "lb_loss": 0.9999999900512997 } }, { "checkpoint_type": "bytes", "bytes_threshold": 138000000, "cumulative_training_bytes": 138004174, "metrics": { "loss": 0.4901451457887006, "ce_loss": 0.4801451553254438, "lb_loss": 0.9999999901139867 } }, { "checkpoint_type": "bytes", "bytes_threshold": 139000000, "cumulative_training_bytes": 139006240, "metrics": { "loss": 0.4903390567955974, "ce_loss": 0.4803390663323406, "lb_loss": 0.999999990163354 } }, { "checkpoint_type": "bytes", "bytes_threshold": 140000000, "cumulative_training_bytes": 140006436, "metrics": { "loss": 0.49048212032282185, "ce_loss": 0.480482129859565, "lb_loss": 0.9999999901594661 } }, { "checkpoint_type": "bytes", "bytes_threshold": 141000000, "cumulative_training_bytes": 141007445, "metrics": { "loss": 0.4905080058343408, "ce_loss": 0.48050801537108395, "lb_loss": 0.9999999901041711 } }, { "checkpoint_type": "bytes", "bytes_threshold": 142000000, "cumulative_training_bytes": 142004918, "metrics": { "loss": 0.4905039665249063, "ce_loss": 0.48050397606164946, "lb_loss": 0.9999999901685075 } }, { "epoch": 3, "checkpoint_type": "epoch", "metrics": { "loss": 0.49051486986155374, "ce_loss": 0.4805148793982969, "lb_loss": 0.9999999901265442, "training_bytes": 47653391 }, "cumulative_training_bytes": 142960216, "training_bytes_this_epoch": 47653391 }, { "checkpoint_type": "bytes", "bytes_threshold": 143000000, "cumulative_training_bytes": 143005202, "metrics": { "loss": 0.4950260321299235, "ce_loss": 0.4850260416666667, "lb_loss": 0.9999999701976776 } }, { "checkpoint_type": "bytes", "bytes_threshold": 144000000, "cumulative_training_bytes": 144006005, "metrics": { "loss": 0.4904259713026729, "ce_loss": 0.48042598083941607, "lb_loss": 0.9999999908635216 } }, { "checkpoint_type": "bytes", "bytes_threshold": 145000000, "cumulative_training_bytes": 145001749, "metrics": { "loss": 0.4900371510437812, "ce_loss": 0.48003716058052437, "lb_loss": 0.9999999908472268 } }, { "checkpoint_type": "bytes", "bytes_threshold": 146000000, "cumulative_training_bytes": 146005280, "metrics": { "loss": 0.4904491602627556, "ce_loss": 0.48044916979949875, "lb_loss": 0.9999999887961194 } }, { "checkpoint_type": "bytes", "bytes_threshold": 147000000, "cumulative_training_bytes": 147006364, "metrics": { "loss": 0.49022183598212477, "ce_loss": 0.48022184551886793, "lb_loss": 0.9999999902158413 } }, { "checkpoint_type": "bytes", "bytes_threshold": 148000000, "cumulative_training_bytes": 148004606, "metrics": { "loss": 0.4898206580768932, "ce_loss": 0.47982066761363634, "lb_loss": 0.9999999900658926 } }, { "checkpoint_type": "bytes", "bytes_threshold": 149000000, "cumulative_training_bytes": 149001684, "metrics": { "loss": 0.48951690106452267, "ce_loss": 0.47951691060126583, "lb_loss": 0.9999999901161918 } }, { "checkpoint_type": "bytes", "bytes_threshold": 150000000, "cumulative_training_bytes": 150003252, "metrics": { "loss": 0.4902524334599995, "ce_loss": 0.4802524429967427, "lb_loss": 0.9999999897099473 } }, { "checkpoint_type": "bytes", "bytes_threshold": 151000000, "cumulative_training_bytes": 151004021, "metrics": { "loss": 0.4901546794499362, "ce_loss": 0.48015468898667935, "lb_loss": 0.9999999898484954 } }, { "checkpoint_type": "bytes", "bytes_threshold": 152000000, "cumulative_training_bytes": 152003583, "metrics": { "loss": 0.4901396364200731, "ce_loss": 0.48013964595681624, "lb_loss": 0.9999999896032542 } }, { "checkpoint_type": "bytes", "bytes_threshold": 153000000, "cumulative_training_bytes": 153004258, "metrics": { "loss": 0.49013379143505564, "ce_loss": 0.4801338009717988, "lb_loss": 0.9999999890058506 } }, { "checkpoint_type": "bytes", "bytes_threshold": 154000000, "cumulative_training_bytes": 154004288, "metrics": { "loss": 0.4900680994376158, "ce_loss": 0.480068108974359, "lb_loss": 0.999999989632179 } }, { "checkpoint_type": "bytes", "bytes_threshold": 155000000, "cumulative_training_bytes": 155004149, "metrics": { "loss": 0.4901411515178947, "ce_loss": 0.4801411610546379, "lb_loss": 0.9999999897755691 } }, { "checkpoint_type": "bytes", "bytes_threshold": 156000000, "cumulative_training_bytes": 156001930, "metrics": { "loss": 0.4899712896123179, "ce_loss": 0.47997129914906106, "lb_loss": 0.9999999894012868 } }, { "checkpoint_type": "bytes", "bytes_threshold": 157000000, "cumulative_training_bytes": 157005966, "metrics": { "loss": 0.4899014294959544, "ce_loss": 0.47990143903269755, "lb_loss": 0.9999999894758012 } }, { "checkpoint_type": "bytes", "bytes_threshold": 158000000, "cumulative_training_bytes": 158006659, "metrics": { "loss": 0.48980809543528125, "ce_loss": 0.4798081049720244, "lb_loss": 0.9999999895403854 } }, { "checkpoint_type": "bytes", "bytes_threshold": 159000000, "cumulative_training_bytes": 159001028, "metrics": { "loss": 0.4895588359286506, "ce_loss": 0.4795588454653938, "lb_loss": 0.9999999895585181 } }, { "checkpoint_type": "bytes", "bytes_threshold": 160000000, "cumulative_training_bytes": 160001860, "metrics": { "loss": 0.4894983198657726, "ce_loss": 0.47949832940251574, "lb_loss": 0.9999999894232549 } }, { "checkpoint_type": "bytes", "bytes_threshold": 161000000, "cumulative_training_bytes": 161000396, "metrics": { "loss": 0.4892045148159733, "ce_loss": 0.4792045243527165, "lb_loss": 0.9999999891972906 } }, { "checkpoint_type": "bytes", "bytes_threshold": 162000000, "cumulative_training_bytes": 162002358, "metrics": { "loss": 0.4891760811347486, "ce_loss": 0.47917609067149175, "lb_loss": 0.9999999890233505 } }, { "checkpoint_type": "bytes", "bytes_threshold": 163000000, "cumulative_training_bytes": 163000910, "metrics": { "loss": 0.4890335630177085, "ce_loss": 0.47903357255445167, "lb_loss": 0.9999999890675471 } }, { "checkpoint_type": "bytes", "bytes_threshold": 164000000, "cumulative_training_bytes": 164005597, "metrics": { "loss": 0.48890226029586237, "ce_loss": 0.47890226983260553, "lb_loss": 0.9999999888729321 } }, { "checkpoint_type": "bytes", "bytes_threshold": 165000000, "cumulative_training_bytes": 165002975, "metrics": { "loss": 0.4889194060730553, "ce_loss": 0.47891941560979845, "lb_loss": 0.9999999890234671 } }, { "checkpoint_type": "bytes", "bytes_threshold": 166000000, "cumulative_training_bytes": 166007294, "metrics": { "loss": 0.48903683825322025, "ce_loss": 0.4790368477899634, "lb_loss": 0.9999999888872696 } }, { "checkpoint_type": "bytes", "bytes_threshold": 167000000, "cumulative_training_bytes": 167001945, "metrics": { "loss": 0.4890494737780068, "ce_loss": 0.47904948331474995, "lb_loss": 0.9999999891006479 } }, { "checkpoint_type": "bytes", "bytes_threshold": 168000000, "cumulative_training_bytes": 168005336, "metrics": { "loss": 0.48906435342565363, "ce_loss": 0.4790643629623968, "lb_loss": 0.9999999890849336 } }, { "checkpoint_type": "bytes", "bytes_threshold": 169000000, "cumulative_training_bytes": 169002071, "metrics": { "loss": 0.48898078195840533, "ce_loss": 0.4789807914951485, "lb_loss": 0.9999999892392673 } }, { "checkpoint_type": "bytes", "bytes_threshold": 170000000, "cumulative_training_bytes": 170002507, "metrics": { "loss": 0.48883532836328514, "ce_loss": 0.4788353379000283, "lb_loss": 0.9999999893484761 } }, { "checkpoint_type": "bytes", "bytes_threshold": 171000000, "cumulative_training_bytes": 171005319, "metrics": { "loss": 0.48872788846981063, "ce_loss": 0.4787278980065538, "lb_loss": 0.9999999894365335 } }, { "checkpoint_type": "bytes", "bytes_threshold": 172000000, "cumulative_training_bytes": 172007475, "metrics": { "loss": 0.4886464073825819, "ce_loss": 0.4786464169193251, "lb_loss": 0.999999989424222 } }, { "checkpoint_type": "bytes", "bytes_threshold": 173000000, "cumulative_training_bytes": 173006995, "metrics": { "loss": 0.48865697313400097, "ce_loss": 0.47865698267074414, "lb_loss": 0.9999999893671633 } }, { "checkpoint_type": "bytes", "bytes_threshold": 174000000, "cumulative_training_bytes": 174002372, "metrics": { "loss": 0.48858499138826916, "ce_loss": 0.4785850009250123, "lb_loss": 0.9999999893993713 } }, { "checkpoint_type": "bytes", "bytes_threshold": 175000000, "cumulative_training_bytes": 175000872, "metrics": { "loss": 0.48849087463510193, "ce_loss": 0.4784908841718451, "lb_loss": 0.9999999894580696 } }, { "checkpoint_type": "bytes", "bytes_threshold": 176000000, "cumulative_training_bytes": 176007018, "metrics": { "loss": 0.4885006819310511, "ce_loss": 0.4785006914677943, "lb_loss": 0.9999999893523677 } }, { "checkpoint_type": "bytes", "bytes_threshold": 177000000, "cumulative_training_bytes": 177003062, "metrics": { "loss": 0.4884071085188124, "ce_loss": 0.4784071180555556, "lb_loss": 0.9999999894492003 } }, { "checkpoint_type": "bytes", "bytes_threshold": 178000000, "cumulative_training_bytes": 178005739, "metrics": { "loss": 0.4883760760542553, "ce_loss": 0.4783760855909985, "lb_loss": 0.9999999893214313 } }, { "checkpoint_type": "bytes", "bytes_threshold": 179000000, "cumulative_training_bytes": 179002039, "metrics": { "loss": 0.48841644468038026, "ce_loss": 0.4784164542171234, "lb_loss": 0.9999999892871193 } }, { "checkpoint_type": "bytes", "bytes_threshold": 180000000, "cumulative_training_bytes": 180001975, "metrics": { "loss": 0.4885168265783871, "ce_loss": 0.47851683611513024, "lb_loss": 0.9999999893307933 } }, { "checkpoint_type": "bytes", "bytes_threshold": 181000000, "cumulative_training_bytes": 181002156, "metrics": { "loss": 0.4885435228641423, "ce_loss": 0.4785435324008855, "lb_loss": 0.9999999895041126 } }, { "checkpoint_type": "bytes", "bytes_threshold": 182000000, "cumulative_training_bytes": 182006789, "metrics": { "loss": 0.48842715038972745, "ce_loss": 0.4784271599264706, "lb_loss": 0.9999999895516564 } }, { "checkpoint_type": "bytes", "bytes_threshold": 183000000, "cumulative_training_bytes": 183001003, "metrics": { "loss": 0.4883744527003505, "ce_loss": 0.4783744622370937, "lb_loss": 0.9999999895720363 } }, { "checkpoint_type": "bytes", "bytes_threshold": 184000000, "cumulative_training_bytes": 184002846, "metrics": { "loss": 0.4883268971737586, "ce_loss": 0.47832690671050176, "lb_loss": 0.9999999894599509 } }, { "checkpoint_type": "bytes", "bytes_threshold": 185000000, "cumulative_training_bytes": 185004724, "metrics": { "loss": 0.4882663181899502, "ce_loss": 0.47826632772669336, "lb_loss": 0.9999999894400365 } }, { "checkpoint_type": "bytes", "bytes_threshold": 186000000, "cumulative_training_bytes": 186007260, "metrics": { "loss": 0.48828150444600626, "ce_loss": 0.47828151398274943, "lb_loss": 0.9999999894740508 } }, { "checkpoint_type": "bytes", "bytes_threshold": 187000000, "cumulative_training_bytes": 187007019, "metrics": { "loss": 0.4882290801773191, "ce_loss": 0.47822908971406225, "lb_loss": 0.9999999894010861 } }, { "checkpoint_type": "bytes", "bytes_threshold": 188000000, "cumulative_training_bytes": 188003736, "metrics": { "loss": 0.488216156216274, "ce_loss": 0.4782161657530172, "lb_loss": 0.9999999894326629 } }, { "checkpoint_type": "bytes", "bytes_threshold": 189000000, "cumulative_training_bytes": 189007403, "metrics": { "loss": 0.4881525303701408, "ce_loss": 0.47815253990688394, "lb_loss": 0.9999999895538252 } }, { "checkpoint_type": "bytes", "bytes_threshold": 190000000, "cumulative_training_bytes": 190003337, "metrics": { "loss": 0.4880743821461995, "ce_loss": 0.4780743916829427, "lb_loss": 0.9999999895905299 } }, { "epoch": 4, "checkpoint_type": "epoch", "metrics": { "loss": 0.488021058104645, "ce_loss": 0.4780210676413882, "lb_loss": 0.9999999895423727, "training_bytes": 47653398 }, "cumulative_training_bytes": 190613614, "training_bytes_this_epoch": 47653398 }, { "checkpoint_type": "bytes", "bytes_threshold": 191000000, "cumulative_training_bytes": 191004295, "metrics": { "loss": 0.48361365467894313, "ce_loss": 0.4736136642156863, "lb_loss": 0.9999999906502518 } }, { "checkpoint_type": "bytes", "bytes_threshold": 192000000, "cumulative_training_bytes": 192003486, "metrics": { "loss": 0.4822246106290027, "ce_loss": 0.47222462016574585, "lb_loss": 0.9999999911086994 } }, { "checkpoint_type": "bytes", "bytes_threshold": 193000000, "cumulative_training_bytes": 193000756, "metrics": { "loss": 0.48206590686197065, "ce_loss": 0.4720659163987138, "lb_loss": 0.9999999875424376 } }, { "checkpoint_type": "bytes", "bytes_threshold": 194000000, "cumulative_training_bytes": 194006438, "metrics": { "loss": 0.4826827534723066, "ce_loss": 0.47268276300904977, "lb_loss": 0.9999999888072726 } }, { "checkpoint_type": "bytes", "bytes_threshold": 195000000, "cumulative_training_bytes": 195005382, "metrics": { "loss": 0.48297037944927085, "ce_loss": 0.472970388986014, "lb_loss": 0.9999999882249565 } }, { "checkpoint_type": "bytes", "bytes_threshold": 196000000, "cumulative_training_bytes": 196002015, "metrics": { "loss": 0.4832766776071315, "ce_loss": 0.47327668714387466, "lb_loss": 0.999999988792289 } }, { "checkpoint_type": "bytes", "bytes_threshold": 197000000, "cumulative_training_bytes": 197006361, "metrics": { "loss": 0.48392085377260935, "ce_loss": 0.4739208633093525, "lb_loss": 0.9999999878503721 } }, { "checkpoint_type": "bytes", "bytes_threshold": 198000000, "cumulative_training_bytes": 198003880, "metrics": { "loss": 0.483928608201846, "ce_loss": 0.4739286177385892, "lb_loss": 0.99999998751023 } }, { "checkpoint_type": "bytes", "bytes_threshold": 199000000, "cumulative_training_bytes": 199006196, "metrics": { "loss": 0.48404037288334817, "ce_loss": 0.47404038242009133, "lb_loss": 0.9999999879702041 } }, { "checkpoint_type": "bytes", "bytes_threshold": 200000000, "cumulative_training_bytes": 200002073, "metrics": { "loss": 0.4839540720959099, "ce_loss": 0.47395408163265307, "lb_loss": 0.999999988760267 } }, { "checkpoint_type": "bytes", "bytes_threshold": 201000000, "cumulative_training_bytes": 201002611, "metrics": { "loss": 0.4842147074617819, "ce_loss": 0.4742147169985251, "lb_loss": 0.9999999887032495 } }, { "checkpoint_type": "bytes", "bytes_threshold": 202000000, "cumulative_training_bytes": 202000755, "metrics": { "loss": 0.48400739288586786, "ce_loss": 0.474007402422611, "lb_loss": 0.9999999884480903 } }, { "checkpoint_type": "bytes", "bytes_threshold": 203000000, "cumulative_training_bytes": 203001562, "metrics": { "loss": 0.4841745324391381, "ce_loss": 0.47417454197588127, "lb_loss": 0.9999999887573181 } }, { "checkpoint_type": "bytes", "bytes_threshold": 204000000, "cumulative_training_bytes": 204005682, "metrics": { "loss": 0.48423728844666647, "ce_loss": 0.47423729798340963, "lb_loss": 0.9999999889179008 } }, { "checkpoint_type": "bytes", "bytes_threshold": 205000000, "cumulative_training_bytes": 205003502, "metrics": { "loss": 0.484211044443555, "ce_loss": 0.47421105398029817, "lb_loss": 0.9999999888598348 } }, { "checkpoint_type": "bytes", "bytes_threshold": 206000000, "cumulative_training_bytes": 206008019, "metrics": { "loss": 0.48419132477137033, "ce_loss": 0.4741913343081135, "lb_loss": 0.9999999889632016 } }, { "checkpoint_type": "bytes", "bytes_threshold": 207000000, "cumulative_training_bytes": 207007717, "metrics": { "loss": 0.4842689376011073, "ce_loss": 0.47426894713785045, "lb_loss": 0.9999999892488818 } }, { "checkpoint_type": "bytes", "bytes_threshold": 208000000, "cumulative_training_bytes": 208005208, "metrics": { "loss": 0.4841757200888075, "ce_loss": 0.47417572962555066, "lb_loss": 0.999999989313176 } }, { "checkpoint_type": "bytes", "bytes_threshold": 209000000, "cumulative_training_bytes": 209005329, "metrics": { "loss": 0.4841251532236735, "ce_loss": 0.47412516276041666, "lb_loss": 0.9999999895443519 } }, { "checkpoint_type": "bytes", "bytes_threshold": 210000000, "cumulative_training_bytes": 210006121, "metrics": { "loss": 0.4841085443945093, "ce_loss": 0.47410855393125245, "lb_loss": 0.9999999895909708 } }, { "checkpoint_type": "bytes", "bytes_threshold": 211000000, "cumulative_training_bytes": 211003532, "metrics": { "loss": 0.48414475511940114, "ce_loss": 0.4741447646561443, "lb_loss": 0.9999999894947094 } }, { "checkpoint_type": "bytes", "bytes_threshold": 212000000, "cumulative_training_bytes": 212007723, "metrics": { "loss": 0.48415256366347176, "ce_loss": 0.4741525732002149, "lb_loss": 0.9999999894966027 } }, { "checkpoint_type": "bytes", "bytes_threshold": 213000000, "cumulative_training_bytes": 213005205, "metrics": { "loss": 0.48423584617656507, "ce_loss": 0.47423585571330823, "lb_loss": 0.9999999894371515 } }, { "checkpoint_type": "bytes", "bytes_threshold": 214000000, "cumulative_training_bytes": 214007542, "metrics": { "loss": 0.4842972747625365, "ce_loss": 0.47429728429927964, "lb_loss": 0.9999999894023176 } }, { "checkpoint_type": "bytes", "bytes_threshold": 215000000, "cumulative_training_bytes": 215006636, "metrics": { "loss": 0.48420266889447544, "ce_loss": 0.4742026784312186, "lb_loss": 0.9999999897414117 } }, { "checkpoint_type": "bytes", "bytes_threshold": 216000000, "cumulative_training_bytes": 216002411, "metrics": { "loss": 0.48442725453235763, "ce_loss": 0.4744272640691008, "lb_loss": 0.9999999896582165 } }, { "checkpoint_type": "bytes", "bytes_threshold": 217000000, "cumulative_training_bytes": 217003351, "metrics": { "loss": 0.48465302021652246, "ce_loss": 0.4746530297532656, "lb_loss": 0.9999999896708351 } }, { "checkpoint_type": "bytes", "bytes_threshold": 218000000, "cumulative_training_bytes": 218001934, "metrics": { "loss": 0.48466454465906106, "ce_loss": 0.4746645541958042, "lb_loss": 0.9999999894795718 } }, { "checkpoint_type": "bytes", "bytes_threshold": 219000000, "cumulative_training_bytes": 219001498, "metrics": { "loss": 0.48470003685327034, "ce_loss": 0.4747000463900135, "lb_loss": 0.9999999894626067 } }, { "checkpoint_type": "bytes", "bytes_threshold": 220000000, "cumulative_training_bytes": 220000802, "metrics": { "loss": 0.48471733482440416, "ce_loss": 0.4747173443611473, "lb_loss": 0.9999999896022145 } }, { "checkpoint_type": "bytes", "bytes_threshold": 221000000, "cumulative_training_bytes": 221002631, "metrics": { "loss": 0.4847172157847394, "ce_loss": 0.4747172253214826, "lb_loss": 0.9999999895699386 } }, { "checkpoint_type": "bytes", "bytes_threshold": 222000000, "cumulative_training_bytes": 222001546, "metrics": { "loss": 0.48476429971392804, "ce_loss": 0.4747643092506712, "lb_loss": 0.9999999897434038 } }, { "checkpoint_type": "bytes", "bytes_threshold": 223000000, "cumulative_training_bytes": 223002095, "metrics": { "loss": 0.4846939223298289, "ce_loss": 0.47469393186657205, "lb_loss": 0.9999999896217131 } }, { "checkpoint_type": "bytes", "bytes_threshold": 224000000, "cumulative_training_bytes": 224004704, "metrics": { "loss": 0.4846698684175937, "ce_loss": 0.47466987795433685, "lb_loss": 0.9999999897695562 } }, { "checkpoint_type": "bytes", "bytes_threshold": 225000000, "cumulative_training_bytes": 225002022, "metrics": { "loss": 0.48464635646704474, "ce_loss": 0.4746463660037879, "lb_loss": 0.9999999898666792 } }, { "checkpoint_type": "bytes", "bytes_threshold": 226000000, "cumulative_training_bytes": 226003031, "metrics": { "loss": 0.4846968945550516, "ce_loss": 0.47469690409179477, "lb_loss": 0.999999989960508 } }, { "checkpoint_type": "bytes", "bytes_threshold": 227000000, "cumulative_training_bytes": 227007213, "metrics": { "loss": 0.48466569770009893, "ce_loss": 0.4746657072368421, "lb_loss": 0.9999999899738713 } }, { "checkpoint_type": "bytes", "bytes_threshold": 228000000, "cumulative_training_bytes": 228002618, "metrics": { "loss": 0.4847170054192085, "ce_loss": 0.47471701495595164, "lb_loss": 0.9999999898766133 } }, { "checkpoint_type": "bytes", "bytes_threshold": 229000000, "cumulative_training_bytes": 229002817, "metrics": { "loss": 0.4849277419846056, "ce_loss": 0.47492775152134875, "lb_loss": 0.9999999898439013 } }, { "checkpoint_type": "bytes", "bytes_threshold": 230000000, "cumulative_training_bytes": 230004657, "metrics": { "loss": 0.48519230282958, "ce_loss": 0.47519231236632314, "lb_loss": 0.9999999897664978 } }, { "checkpoint_type": "bytes", "bytes_threshold": 231000000, "cumulative_training_bytes": 231006924, "metrics": { "loss": 0.4853118831206326, "ce_loss": 0.4753118926573758, "lb_loss": 0.9999999898737654 } }, { "checkpoint_type": "bytes", "bytes_threshold": 232000000, "cumulative_training_bytes": 232007018, "metrics": { "loss": 0.4854375916427203, "ce_loss": 0.4754376011794635, "lb_loss": 0.9999999898104178 } }, { "checkpoint_type": "bytes", "bytes_threshold": 233000000, "cumulative_training_bytes": 233006236, "metrics": { "loss": 0.48551042782778, "ce_loss": 0.47551043736452314, "lb_loss": 0.9999999898362022 } }, { "checkpoint_type": "bytes", "bytes_threshold": 234000000, "cumulative_training_bytes": 234000486, "metrics": { "loss": 0.4855104365451488, "ce_loss": 0.47551044608189197, "lb_loss": 0.9999999898169262 } }, { "checkpoint_type": "bytes", "bytes_threshold": 235000000, "cumulative_training_bytes": 235002824, "metrics": { "loss": 0.485555099787045, "ce_loss": 0.47555510932378814, "lb_loss": 0.9999999898311206 } }, { "checkpoint_type": "bytes", "bytes_threshold": 236000000, "cumulative_training_bytes": 236004788, "metrics": { "loss": 0.48557505530384387, "ce_loss": 0.47557506484058704, "lb_loss": 0.9999999897843591 } }, { "checkpoint_type": "bytes", "bytes_threshold": 237000000, "cumulative_training_bytes": 237001532, "metrics": { "loss": 0.4854937203452311, "ce_loss": 0.47549372988197425, "lb_loss": 0.999999989826477 } }, { "checkpoint_type": "bytes", "bytes_threshold": 238000000, "cumulative_training_bytes": 238004993, "metrics": { "loss": 0.485468689970447, "ce_loss": 0.4754686995071902, "lb_loss": 0.9999999898203087 } }, { "epoch": 5, "checkpoint_type": "epoch", "metrics": { "loss": 0.48547107517566046, "ce_loss": 0.4754710847124036, "lb_loss": 0.9999999897626342, "training_bytes": 47653400 }, "cumulative_training_bytes": 238267014, "training_bytes_this_epoch": 47653400 } ] }