| { | |
| "run_name": "run_large_20260115_191350", | |
| "timestamp": "20260115_191350", | |
| "phase": "large", | |
| "config": { | |
| "arch_layout": [ | |
| "m4", | |
| [ | |
| "T1m4", | |
| [ | |
| "T22" | |
| ], | |
| "m4T1" | |
| ], | |
| "m4" | |
| ], | |
| "d_model": [ | |
| 1024, | |
| 1024, | |
| 1536 | |
| ], | |
| "d_intermediate": [ | |
| 0, | |
| 2816, | |
| 4096 | |
| ], | |
| "vocab_size": 256, | |
| "ssm_cfg": { | |
| "chunk_size": 256, | |
| "d_conv": 4, | |
| "d_state": 128, | |
| "expand": 2 | |
| }, | |
| "attn_cfg": { | |
| "num_heads": [ | |
| 16, | |
| 16, | |
| 16 | |
| ], | |
| "rotary_emb_dim": [ | |
| 32, | |
| 32, | |
| 48 | |
| ], | |
| "window_size": [ | |
| 1023, | |
| 1023, | |
| -1 | |
| ] | |
| }, | |
| "tie_embeddings": false | |
| }, | |
| "training_args": { | |
| "data": "datasets/PI1M/PI1M_v2.csv", | |
| "max_samples": null, | |
| "batch_size": 16, | |
| "epochs": 5, | |
| "lr": 0.0001, | |
| "weight_decay": 0.1, | |
| "gradient_accumulation": 8, | |
| "concatenate": true, | |
| "num_concatenate": 10, | |
| "concatenate_separator": " ", | |
| "checkpoint_bytes": 1000000, | |
| "num_test_samples": 5, | |
| "num_visualize": 5, | |
| "skip_visualization": false | |
| }, | |
| "dataset_info": { | |
| "train_size": 99574, | |
| "test_size": 5, | |
| "test_smiles_file": "checkpoints/run_large_20260115_191350/test_smiles.txt" | |
| }, | |
| "model_info": { | |
| "num_parameters": 622923776, | |
| "device": "cuda", | |
| "dtype": "torch.bfloat16", | |
| "use_amp": true | |
| }, | |
| "training_history": [ | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 1000000, | |
| "cumulative_training_bytes": 1000166, | |
| "metrics": { | |
| "loss": 3.0352404484382043, | |
| "ce_loss": 3.0252403846153846, | |
| "lb_loss": 0.9999999889960656 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 2000000, | |
| "cumulative_training_bytes": 2000240, | |
| "metrics": { | |
| "loss": 2.107340772335346, | |
| "ce_loss": 2.097340745192308, | |
| "lb_loss": 0.9999999871620765 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 3000000, | |
| "cumulative_training_bytes": 3001794, | |
| "metrics": { | |
| "loss": 1.7094185730380476, | |
| "ce_loss": 1.6994185581841432, | |
| "lb_loss": 0.9999999873473516 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 4000000, | |
| "cumulative_training_bytes": 4002359, | |
| "metrics": { | |
| "loss": 1.47650072853762, | |
| "ce_loss": 1.4665007197696738, | |
| "lb_loss": 0.9999999890171863 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 5000000, | |
| "cumulative_training_bytes": 5005670, | |
| "metrics": { | |
| "loss": 1.3171558716545808, | |
| "ce_loss": 1.3071558665644172, | |
| "lb_loss": 0.9999999897611653 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 6000000, | |
| "cumulative_training_bytes": 6001321, | |
| "metrics": { | |
| "loss": 1.2017559169808312, | |
| "ce_loss": 1.1917559143222507, | |
| "lb_loss": 0.9999999908535072 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 7000000, | |
| "cumulative_training_bytes": 7001673, | |
| "metrics": { | |
| "loss": 1.1151093587948484, | |
| "ce_loss": 1.1051093578860898, | |
| "lb_loss": 0.9999999904684795 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 8000000, | |
| "cumulative_training_bytes": 8004669, | |
| "metrics": { | |
| "loss": 1.0468063034773787, | |
| "ce_loss": 1.0368063038793103, | |
| "lb_loss": 0.9999999897804297 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 9000000, | |
| "cumulative_training_bytes": 9006752, | |
| "metrics": { | |
| "loss": 0.9919913549626127, | |
| "ce_loss": 0.9819913563829787, | |
| "lb_loss": 0.9999999897023465 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 10000000, | |
| "cumulative_training_bytes": 10007281, | |
| "metrics": { | |
| "loss": 0.9471440684010387, | |
| "ce_loss": 0.9371440706355283, | |
| "lb_loss": 0.9999999893660932 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 11000000, | |
| "cumulative_training_bytes": 11001365, | |
| "metrics": { | |
| "loss": 0.9100927569407938, | |
| "ce_loss": 0.900092759836351, | |
| "lb_loss": 0.999999989540132 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 12000000, | |
| "cumulative_training_bytes": 12005386, | |
| "metrics": { | |
| "loss": 0.8784949809940438, | |
| "ce_loss": 0.868494984444799, | |
| "lb_loss": 0.999999989882045 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 13000000, | |
| "cumulative_training_bytes": 13001269, | |
| "metrics": { | |
| "loss": 0.8592479796569771, | |
| "ce_loss": 0.849247983573954, | |
| "lb_loss": 0.999999989954668 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 14000000, | |
| "cumulative_training_bytes": 14005280, | |
| "metrics": { | |
| "loss": 0.8378439935604906, | |
| "ce_loss": 0.8278439978801969, | |
| "lb_loss": 0.9999999899245978 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 15000000, | |
| "cumulative_training_bytes": 15001797, | |
| "metrics": { | |
| "loss": 0.8179623213681307, | |
| "ce_loss": 0.8079623260342186, | |
| "lb_loss": 0.9999999895889742 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 16000000, | |
| "cumulative_training_bytes": 16003308, | |
| "metrics": { | |
| "loss": 0.7999628585397256, | |
| "ce_loss": 0.7899628635112494, | |
| "lb_loss": 0.999999989471463 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 17000000, | |
| "cumulative_training_bytes": 17001780, | |
| "metrics": { | |
| "loss": 0.783798369592028, | |
| "ce_loss": 0.773798374831005, | |
| "lb_loss": 0.9999999887720858 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 18000000, | |
| "cumulative_training_bytes": 18002585, | |
| "metrics": { | |
| "loss": 0.7691971354788922, | |
| "ce_loss": 0.7591971409574468, | |
| "lb_loss": 0.9999999888399814 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 19000000, | |
| "cumulative_training_bytes": 19004388, | |
| "metrics": { | |
| "loss": 0.7562685100266358, | |
| "ce_loss": 0.746268515719468, | |
| "lb_loss": 0.9999999887325359 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 20000000, | |
| "cumulative_training_bytes": 20001795, | |
| "metrics": { | |
| "loss": 0.7443181650561906, | |
| "ce_loss": 0.7343181709418071, | |
| "lb_loss": 0.9999999887043265 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 21000000, | |
| "cumulative_training_bytes": 21006219, | |
| "metrics": { | |
| "loss": 0.7334088699425653, | |
| "ce_loss": 0.723408876002552, | |
| "lb_loss": 0.9999999888743791 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 22000000, | |
| "cumulative_training_bytes": 22003647, | |
| "metrics": { | |
| "loss": 0.7233542565306926, | |
| "ce_loss": 0.7133542627479986, | |
| "lb_loss": 0.9999999891080966 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 23000000, | |
| "cumulative_training_bytes": 23000855, | |
| "metrics": { | |
| "loss": 0.7141935865044633, | |
| "ce_loss": 0.7041935928654679, | |
| "lb_loss": 0.9999999891627919 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 24000000, | |
| "cumulative_training_bytes": 24007583, | |
| "metrics": { | |
| "loss": 0.7056202586567953, | |
| "ce_loss": 0.6956202651515152, | |
| "lb_loss": 0.9999999891818045 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 25000000, | |
| "cumulative_training_bytes": 25004319, | |
| "metrics": { | |
| "loss": 0.6978230217149393, | |
| "ce_loss": 0.687823028330781, | |
| "lb_loss": 0.9999999895577774 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 26000000, | |
| "cumulative_training_bytes": 26000600, | |
| "metrics": { | |
| "loss": 0.6906206210337261, | |
| "ce_loss": 0.6806206277614139, | |
| "lb_loss": 0.9999999897293911 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 27000000, | |
| "cumulative_training_bytes": 27007515, | |
| "metrics": { | |
| "loss": 0.6838098439610576, | |
| "ce_loss": 0.6738098507938758, | |
| "lb_loss": 0.9999999897926835 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 28000000, | |
| "cumulative_training_bytes": 28003023, | |
| "metrics": { | |
| "loss": 0.6774992880874688, | |
| "ce_loss": 0.6674992950164069, | |
| "lb_loss": 0.9999999895687797 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 29000000, | |
| "cumulative_training_bytes": 29003935, | |
| "metrics": { | |
| "loss": 0.6715684946638226, | |
| "ce_loss": 0.6615685016829461, | |
| "lb_loss": 0.9999999895046732 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 30000000, | |
| "cumulative_training_bytes": 30001066, | |
| "metrics": { | |
| "loss": 0.6660281601701846, | |
| "ce_loss": 0.6560281672728433, | |
| "lb_loss": 0.9999999894573715 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 31000000, | |
| "cumulative_training_bytes": 31004436, | |
| "metrics": { | |
| "loss": 0.6609612641201458, | |
| "ce_loss": 0.6509612713015559, | |
| "lb_loss": 0.9999999894746058 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 32000000, | |
| "cumulative_training_bytes": 32006649, | |
| "metrics": { | |
| "loss": 0.6561554203763533, | |
| "ce_loss": 0.646155427631579, | |
| "lb_loss": 0.9999999895050194 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 33000000, | |
| "cumulative_training_bytes": 33004203, | |
| "metrics": { | |
| "loss": 0.6516305574961438, | |
| "ce_loss": 0.6416305648201857, | |
| "lb_loss": 0.9999999895588151 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 34000000, | |
| "cumulative_training_bytes": 34006104, | |
| "metrics": { | |
| "loss": 0.6472530922785559, | |
| "ce_loss": 0.6372530996678676, | |
| "lb_loss": 0.9999999896520646 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 35000000, | |
| "cumulative_training_bytes": 35005618, | |
| "metrics": { | |
| "loss": 0.6431124474281974, | |
| "ce_loss": 0.6331124548785824, | |
| "lb_loss": 0.9999999896725271 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 36000000, | |
| "cumulative_training_bytes": 36002823, | |
| "metrics": { | |
| "loss": 0.6391829455870056, | |
| "ce_loss": 0.6291829530950862, | |
| "lb_loss": 0.9999999896918579 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 37000000, | |
| "cumulative_training_bytes": 37006427, | |
| "metrics": { | |
| "loss": 0.6354130913090232, | |
| "ce_loss": 0.6254130988721026, | |
| "lb_loss": 0.9999999896752716 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 38000000, | |
| "cumulative_training_bytes": 38005922, | |
| "metrics": { | |
| "loss": 0.6318843585695924, | |
| "ce_loss": 0.6218843661847673, | |
| "lb_loss": 0.9999999897196099 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 39000000, | |
| "cumulative_training_bytes": 39004443, | |
| "metrics": { | |
| "loss": 0.6285198655931632, | |
| "ce_loss": 0.6185198732577543, | |
| "lb_loss": 0.9999999895276488 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 40000000, | |
| "cumulative_training_bytes": 40005613, | |
| "metrics": { | |
| "loss": 0.6254313996155814, | |
| "ce_loss": 0.615431407326761, | |
| "lb_loss": 0.9999999897083863 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 41000000, | |
| "cumulative_training_bytes": 41003596, | |
| "metrics": { | |
| "loss": 0.6224746753085582, | |
| "ce_loss": 0.6124746830640643, | |
| "lb_loss": 0.9999999896242941 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 42000000, | |
| "cumulative_training_bytes": 42004130, | |
| "metrics": { | |
| "loss": 0.619576180100767, | |
| "ce_loss": 0.609576187898815, | |
| "lb_loss": 0.9999999894482935 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 43000000, | |
| "cumulative_training_bytes": 43002856, | |
| "metrics": { | |
| "loss": 0.6168661168497852, | |
| "ce_loss": 0.6068661246883903, | |
| "lb_loss": 0.9999999894715442 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 44000000, | |
| "cumulative_training_bytes": 44000615, | |
| "metrics": { | |
| "loss": 0.6142508432585481, | |
| "ce_loss": 0.6042508511355725, | |
| "lb_loss": 0.9999999894192938 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 45000000, | |
| "cumulative_training_bytes": 45002728, | |
| "metrics": { | |
| "loss": 0.6117183565789184, | |
| "ce_loss": 0.6017183644929386, | |
| "lb_loss": 0.9999999893305962 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 46000000, | |
| "cumulative_training_bytes": 46000713, | |
| "metrics": { | |
| "loss": 0.6093004826243594, | |
| "ce_loss": 0.5993004905734975, | |
| "lb_loss": 0.9999999892538988 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 47000000, | |
| "cumulative_training_bytes": 47001586, | |
| "metrics": { | |
| "loss": 0.6069603338424916, | |
| "ce_loss": 0.5969603418255132, | |
| "lb_loss": 0.999999989075395 | |
| } | |
| }, | |
| { | |
| "epoch": 1, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.6054869050538325, | |
| "ce_loss": 0.5954869130583226, | |
| "lb_loss": 0.9999999890922734, | |
| "training_bytes": 47653409 | |
| }, | |
| "cumulative_training_bytes": 47653409, | |
| "training_bytes_this_epoch": 47653409 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 48000000, | |
| "cumulative_training_bytes": 48006676, | |
| "metrics": { | |
| "loss": 0.49496941981108294, | |
| "ce_loss": 0.4849694293478261, | |
| "lb_loss": 0.9999999935212343 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 49000000, | |
| "cumulative_training_bytes": 49000759, | |
| "metrics": { | |
| "loss": 0.49630592086098413, | |
| "ce_loss": 0.4863059303977273, | |
| "lb_loss": 0.9999999932267449 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 50000000, | |
| "cumulative_training_bytes": 50005240, | |
| "metrics": { | |
| "loss": 0.4959718451049506, | |
| "ce_loss": 0.4859718546416938, | |
| "lb_loss": 0.9999999914573148 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 51000000, | |
| "cumulative_training_bytes": 51007539, | |
| "metrics": { | |
| "loss": 0.49752317824864495, | |
| "ce_loss": 0.4875231877853881, | |
| "lb_loss": 0.9999999910184781 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 52000000, | |
| "cumulative_training_bytes": 52002554, | |
| "metrics": { | |
| "loss": 0.4988107849174822, | |
| "ce_loss": 0.4888107944542254, | |
| "lb_loss": 0.9999999891914112 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 53000000, | |
| "cumulative_training_bytes": 53005306, | |
| "metrics": { | |
| "loss": 0.49884286868214095, | |
| "ce_loss": 0.4888428782188841, | |
| "lb_loss": 0.9999999886589159 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 54000000, | |
| "cumulative_training_bytes": 54000123, | |
| "metrics": { | |
| "loss": 0.49843673654287085, | |
| "ce_loss": 0.488436746079614, | |
| "lb_loss": 0.9999999882803895 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 55000000, | |
| "cumulative_training_bytes": 55003152, | |
| "metrics": { | |
| "loss": 0.4980025132497152, | |
| "ce_loss": 0.48800252278645834, | |
| "lb_loss": 0.9999999890724818 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 56000000, | |
| "cumulative_training_bytes": 56002937, | |
| "metrics": { | |
| "loss": 0.4978086235979956, | |
| "ce_loss": 0.48780863313473877, | |
| "lb_loss": 0.9999999890733924 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 57000000, | |
| "cumulative_training_bytes": 57004703, | |
| "metrics": { | |
| "loss": 0.4975252436342879, | |
| "ce_loss": 0.48752525317103107, | |
| "lb_loss": 0.9999999889765551 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 58000000, | |
| "cumulative_training_bytes": 58002959, | |
| "metrics": { | |
| "loss": 0.49715732681680713, | |
| "ce_loss": 0.4871573363535503, | |
| "lb_loss": 0.9999999886698271 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 59000000, | |
| "cumulative_training_bytes": 59000108, | |
| "metrics": { | |
| "loss": 0.4970432515893526, | |
| "ce_loss": 0.48704326112609575, | |
| "lb_loss": 0.9999999883443378 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 60000000, | |
| "cumulative_training_bytes": 60007478, | |
| "metrics": { | |
| "loss": 0.4969303793951454, | |
| "ce_loss": 0.48693038893188856, | |
| "lb_loss": 0.9999999884481401 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 61000000, | |
| "cumulative_training_bytes": 61002660, | |
| "metrics": { | |
| "loss": 0.49673105242600757, | |
| "ce_loss": 0.48673106196275073, | |
| "lb_loss": 0.9999999883864875 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 62000000, | |
| "cumulative_training_bytes": 62003465, | |
| "metrics": { | |
| "loss": 0.49654987219300095, | |
| "ce_loss": 0.4865498817297441, | |
| "lb_loss": 0.9999999883713753 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 63000000, | |
| "cumulative_training_bytes": 63000868, | |
| "metrics": { | |
| "loss": 0.4964099013555799, | |
| "ce_loss": 0.48640991089232305, | |
| "lb_loss": 0.9999999887089905 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 64000000, | |
| "cumulative_training_bytes": 64003546, | |
| "metrics": { | |
| "loss": 0.49635096437528303, | |
| "ce_loss": 0.4863509739120262, | |
| "lb_loss": 0.9999999889827633 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 65000000, | |
| "cumulative_training_bytes": 65001846, | |
| "metrics": { | |
| "loss": 0.4962221452934289, | |
| "ce_loss": 0.48622215483017206, | |
| "lb_loss": 0.9999999886680185 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 66000000, | |
| "cumulative_training_bytes": 66004938, | |
| "metrics": { | |
| "loss": 0.4961587034532485, | |
| "ce_loss": 0.48615871298999164, | |
| "lb_loss": 0.9999999882679765 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 67000000, | |
| "cumulative_training_bytes": 67000216, | |
| "metrics": { | |
| "loss": 0.49601907669743406, | |
| "ce_loss": 0.4860190862341772, | |
| "lb_loss": 0.9999999884704623 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 68000000, | |
| "cumulative_training_bytes": 68000224, | |
| "metrics": { | |
| "loss": 0.4964207015242049, | |
| "ce_loss": 0.4864207110609481, | |
| "lb_loss": 0.9999999881822244 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 69000000, | |
| "cumulative_training_bytes": 69005372, | |
| "metrics": { | |
| "loss": 0.49684213258408866, | |
| "ce_loss": 0.4868421421208318, | |
| "lb_loss": 0.9999999881602821 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 70000000, | |
| "cumulative_training_bytes": 70001864, | |
| "metrics": { | |
| "loss": 0.497037369488608, | |
| "ce_loss": 0.48703737902535116, | |
| "lb_loss": 0.9999999881770848 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 71000000, | |
| "cumulative_training_bytes": 71000907, | |
| "metrics": { | |
| "loss": 0.49706029712117744, | |
| "ce_loss": 0.4870603066579206, | |
| "lb_loss": 0.9999999880360634 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 72000000, | |
| "cumulative_training_bytes": 72005398, | |
| "metrics": { | |
| "loss": 0.49712042088778513, | |
| "ce_loss": 0.4871204304245283, | |
| "lb_loss": 0.9999999880790711 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 73000000, | |
| "cumulative_training_bytes": 73003962, | |
| "metrics": { | |
| "loss": 0.49715716096929913, | |
| "ce_loss": 0.4871571705060423, | |
| "lb_loss": 0.9999999879890338 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 74000000, | |
| "cumulative_training_bytes": 74006324, | |
| "metrics": { | |
| "loss": 0.4971806565123705, | |
| "ce_loss": 0.48718066604911364, | |
| "lb_loss": 0.9999999879612822 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 75000000, | |
| "cumulative_training_bytes": 75002178, | |
| "metrics": { | |
| "loss": 0.4972360369138309, | |
| "ce_loss": 0.48723604645057406, | |
| "lb_loss": 0.999999987898805 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 76000000, | |
| "cumulative_training_bytes": 76006119, | |
| "metrics": { | |
| "loss": 0.49723345379388895, | |
| "ce_loss": 0.4872334633306321, | |
| "lb_loss": 0.9999999879728066 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 77000000, | |
| "cumulative_training_bytes": 77005284, | |
| "metrics": { | |
| "loss": 0.4972499007815454, | |
| "ce_loss": 0.48724991031828857, | |
| "lb_loss": 0.9999999881039516 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 78000000, | |
| "cumulative_training_bytes": 78007177, | |
| "metrics": { | |
| "loss": 0.4972263361683527, | |
| "ce_loss": 0.4872263457050959, | |
| "lb_loss": 0.9999999881362097 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 79000000, | |
| "cumulative_training_bytes": 79001491, | |
| "metrics": { | |
| "loss": 0.4971963830499691, | |
| "ce_loss": 0.48719639258671227, | |
| "lb_loss": 0.9999999881780725 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 80000000, | |
| "cumulative_training_bytes": 80002957, | |
| "metrics": { | |
| "loss": 0.49715744238633375, | |
| "ce_loss": 0.4871574519230769, | |
| "lb_loss": 0.9999999881778243 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 81000000, | |
| "cumulative_training_bytes": 81002131, | |
| "metrics": { | |
| "loss": 0.4970846991314543, | |
| "ce_loss": 0.4870847086681975, | |
| "lb_loss": 0.9999999881201305 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 82000000, | |
| "cumulative_training_bytes": 82000379, | |
| "metrics": { | |
| "loss": 0.497049108552869, | |
| "ce_loss": 0.48704911808961215, | |
| "lb_loss": 0.9999999881481625 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 83000000, | |
| "cumulative_training_bytes": 83002326, | |
| "metrics": { | |
| "loss": 0.49690102084670573, | |
| "ce_loss": 0.4869010303834489, | |
| "lb_loss": 0.9999999881849545 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 84000000, | |
| "cumulative_training_bytes": 84004823, | |
| "metrics": { | |
| "loss": 0.4968436548828903, | |
| "ce_loss": 0.48684366441963345, | |
| "lb_loss": 0.9999999882473252 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 85000000, | |
| "cumulative_training_bytes": 85001132, | |
| "metrics": { | |
| "loss": 0.496751819840948, | |
| "ce_loss": 0.4867518293776912, | |
| "lb_loss": 0.9999999883161697 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 86000000, | |
| "cumulative_training_bytes": 86000628, | |
| "metrics": { | |
| "loss": 0.4967399565175699, | |
| "ce_loss": 0.4867399660543131, | |
| "lb_loss": 0.9999999883718574 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 87000000, | |
| "cumulative_training_bytes": 87000672, | |
| "metrics": { | |
| "loss": 0.49681193101589355, | |
| "ce_loss": 0.4868119405526367, | |
| "lb_loss": 0.9999999883783122 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 88000000, | |
| "cumulative_training_bytes": 88002075, | |
| "metrics": { | |
| "loss": 0.49670176321425324, | |
| "ce_loss": 0.4867017727509964, | |
| "lb_loss": 0.9999999882917427 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 89000000, | |
| "cumulative_training_bytes": 89004728, | |
| "metrics": { | |
| "loss": 0.49663121152807166, | |
| "ce_loss": 0.4866312210648148, | |
| "lb_loss": 0.9999999883770943 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 90000000, | |
| "cumulative_training_bytes": 90003725, | |
| "metrics": { | |
| "loss": 0.49656294246108723, | |
| "ce_loss": 0.4865629519978304, | |
| "lb_loss": 0.999999988555391 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 91000000, | |
| "cumulative_training_bytes": 91002611, | |
| "metrics": { | |
| "loss": 0.4965044176845958, | |
| "ce_loss": 0.48650442722133896, | |
| "lb_loss": 0.9999999886813296 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 92000000, | |
| "cumulative_training_bytes": 92003164, | |
| "metrics": { | |
| "loss": 0.4964984069213024, | |
| "ce_loss": 0.4864984164580456, | |
| "lb_loss": 0.9999999888961651 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 93000000, | |
| "cumulative_training_bytes": 93001402, | |
| "metrics": { | |
| "loss": 0.49645113397473944, | |
| "ce_loss": 0.4864511435114826, | |
| "lb_loss": 0.999999989119787 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 94000000, | |
| "cumulative_training_bytes": 94007638, | |
| "metrics": { | |
| "loss": 0.4963942520052126, | |
| "ce_loss": 0.48639426154195575, | |
| "lb_loss": 0.9999999891207247 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 95000000, | |
| "cumulative_training_bytes": 95004271, | |
| "metrics": { | |
| "loss": 0.4963107445261611, | |
| "ce_loss": 0.48631075406290425, | |
| "lb_loss": 0.9999999891373812 | |
| } | |
| }, | |
| { | |
| "epoch": 2, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.4962876345627106, | |
| "ce_loss": 0.48628764409945374, | |
| "lb_loss": 0.999999989168886, | |
| "training_bytes": 47653416 | |
| }, | |
| "cumulative_training_bytes": 95306825, | |
| "training_bytes_this_epoch": 47653416 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 96000000, | |
| "cumulative_training_bytes": 96003218, | |
| "metrics": { | |
| "loss": 0.49025411134237773, | |
| "ce_loss": 0.4802541208791209, | |
| "lb_loss": 0.9999999908300546 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 97000000, | |
| "cumulative_training_bytes": 97000816, | |
| "metrics": { | |
| "loss": 0.4910255136533021, | |
| "ce_loss": 0.48102552319004527, | |
| "lb_loss": 0.9999999905603504 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 98000000, | |
| "cumulative_training_bytes": 98005358, | |
| "metrics": { | |
| "loss": 0.49233333855107553, | |
| "ce_loss": 0.4823333480878187, | |
| "lb_loss": 0.9999999910508607 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 99000000, | |
| "cumulative_training_bytes": 99000141, | |
| "metrics": { | |
| "loss": 0.4918436110636709, | |
| "ce_loss": 0.4818436206004141, | |
| "lb_loss": 0.999999992102076 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 100000000, | |
| "cumulative_training_bytes": 100005926, | |
| "metrics": { | |
| "loss": 0.4912067290626054, | |
| "ce_loss": 0.48120673859934854, | |
| "lb_loss": 0.9999999912631629 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 101000000, | |
| "cumulative_training_bytes": 101001458, | |
| "metrics": { | |
| "loss": 0.4909990244014289, | |
| "ce_loss": 0.48099903393817206, | |
| "lb_loss": 0.999999990947144 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 102000000, | |
| "cumulative_training_bytes": 102004630, | |
| "metrics": { | |
| "loss": 0.49028549532595705, | |
| "ce_loss": 0.4802855048627002, | |
| "lb_loss": 0.9999999912707156 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 103000000, | |
| "cumulative_training_bytes": 103004382, | |
| "metrics": { | |
| "loss": 0.490558137229426, | |
| "ce_loss": 0.48055814676616915, | |
| "lb_loss": 0.99999999092586 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 104000000, | |
| "cumulative_training_bytes": 104002283, | |
| "metrics": { | |
| "loss": 0.49042572008880747, | |
| "ce_loss": 0.48042572962555063, | |
| "lb_loss": 0.9999999908623717 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 105000000, | |
| "cumulative_training_bytes": 105006513, | |
| "metrics": { | |
| "loss": 0.49059360480816605, | |
| "ce_loss": 0.4805936143449092, | |
| "lb_loss": 0.9999999903559967 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 106000000, | |
| "cumulative_training_bytes": 106006613, | |
| "metrics": { | |
| "loss": 0.4903415147116462, | |
| "ce_loss": 0.4803415242483894, | |
| "lb_loss": 0.9999999906561079 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 107000000, | |
| "cumulative_training_bytes": 107005607, | |
| "metrics": { | |
| "loss": 0.4906465298378475, | |
| "ce_loss": 0.4806465393745907, | |
| "lb_loss": 0.9999999903976801 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 108000000, | |
| "cumulative_training_bytes": 108001197, | |
| "metrics": { | |
| "loss": 0.4906608704421343, | |
| "ce_loss": 0.48066087997887746, | |
| "lb_loss": 0.9999999902877164 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 109000000, | |
| "cumulative_training_bytes": 109001691, | |
| "metrics": { | |
| "loss": 0.49069485728372664, | |
| "ce_loss": 0.4806948668204698, | |
| "lb_loss": 0.9999999900325566 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 110000000, | |
| "cumulative_training_bytes": 110007304, | |
| "metrics": { | |
| "loss": 0.4906437990875403, | |
| "ce_loss": 0.48064380862428346, | |
| "lb_loss": 0.9999999899985953 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 111000000, | |
| "cumulative_training_bytes": 111006246, | |
| "metrics": { | |
| "loss": 0.49070311546325684, | |
| "ce_loss": 0.480703125, | |
| "lb_loss": 0.9999999900562008 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 112000000, | |
| "cumulative_training_bytes": 112006808, | |
| "metrics": { | |
| "loss": 0.4907320227878786, | |
| "ce_loss": 0.48073203232462175, | |
| "lb_loss": 0.9999999894783181 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 113000000, | |
| "cumulative_training_bytes": 113006280, | |
| "metrics": { | |
| "loss": 0.4907356900739835, | |
| "ce_loss": 0.48073569961072665, | |
| "lb_loss": 0.999999989610436 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 114000000, | |
| "cumulative_training_bytes": 114000244, | |
| "metrics": { | |
| "loss": 0.4906710912515451, | |
| "ce_loss": 0.4806711007882883, | |
| "lb_loss": 0.9999999897974031 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 115000000, | |
| "cumulative_training_bytes": 115000090, | |
| "metrics": { | |
| "loss": 0.49064408903496304, | |
| "ce_loss": 0.4806440985717062, | |
| "lb_loss": 0.9999999897608811 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 116000000, | |
| "cumulative_training_bytes": 116003964, | |
| "metrics": { | |
| "loss": 0.4908688999492036, | |
| "ce_loss": 0.48086890948594674, | |
| "lb_loss": 0.9999999897499409 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 117000000, | |
| "cumulative_training_bytes": 117001141, | |
| "metrics": { | |
| "loss": 0.49077886969755463, | |
| "ce_loss": 0.4807788792342978, | |
| "lb_loss": 0.9999999896522636 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 118000000, | |
| "cumulative_training_bytes": 118002964, | |
| "metrics": { | |
| "loss": 0.49081061967910844, | |
| "ce_loss": 0.4808106292158516, | |
| "lb_loss": 0.9999999897073936 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 119000000, | |
| "cumulative_training_bytes": 119004829, | |
| "metrics": { | |
| "loss": 0.49074190038735244, | |
| "ce_loss": 0.4807419099240956, | |
| "lb_loss": 0.9999999899118753 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 120000000, | |
| "cumulative_training_bytes": 120005174, | |
| "metrics": { | |
| "loss": 0.49069510202198013, | |
| "ce_loss": 0.4806951115587233, | |
| "lb_loss": 0.999999989785755 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 121000000, | |
| "cumulative_training_bytes": 121000398, | |
| "metrics": { | |
| "loss": 0.4906328099449369, | |
| "ce_loss": 0.4806328194816801, | |
| "lb_loss": 0.9999999898084403 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 122000000, | |
| "cumulative_training_bytes": 122005153, | |
| "metrics": { | |
| "loss": 0.4905734521533371, | |
| "ce_loss": 0.48057346169008025, | |
| "lb_loss": 0.9999999895931111 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 123000000, | |
| "cumulative_training_bytes": 123002062, | |
| "metrics": { | |
| "loss": 0.49056105234136627, | |
| "ce_loss": 0.48056106187810943, | |
| "lb_loss": 0.9999999894398626 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 124000000, | |
| "cumulative_training_bytes": 124006089, | |
| "metrics": { | |
| "loss": 0.4904723872690717, | |
| "ce_loss": 0.4804723968058149, | |
| "lb_loss": 0.9999999896498737 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 125000000, | |
| "cumulative_training_bytes": 125006477, | |
| "metrics": { | |
| "loss": 0.4903383307249222, | |
| "ce_loss": 0.4803383402616654, | |
| "lb_loss": 0.9999999898584517 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 126000000, | |
| "cumulative_training_bytes": 126002630, | |
| "metrics": { | |
| "loss": 0.49058030584739254, | |
| "ce_loss": 0.4805803153841357, | |
| "lb_loss": 0.9999999897561486 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 127000000, | |
| "cumulative_training_bytes": 127007067, | |
| "metrics": { | |
| "loss": 0.49066594004055153, | |
| "ce_loss": 0.4806659495772947, | |
| "lb_loss": 0.9999999898067419 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 128000000, | |
| "cumulative_training_bytes": 128000583, | |
| "metrics": { | |
| "loss": 0.49058034760611396, | |
| "ce_loss": 0.48058035714285713, | |
| "lb_loss": 0.999999989768102 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 129000000, | |
| "cumulative_training_bytes": 129007289, | |
| "metrics": { | |
| "loss": 0.4905069065050655, | |
| "ce_loss": 0.4805069160418087, | |
| "lb_loss": 0.9999999897476218 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 130000000, | |
| "cumulative_training_bytes": 130006166, | |
| "metrics": { | |
| "loss": 0.49045753542133275, | |
| "ce_loss": 0.4804575449580759, | |
| "lb_loss": 0.9999999899782128 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 131000000, | |
| "cumulative_training_bytes": 131001304, | |
| "metrics": { | |
| "loss": 0.4904289406187695, | |
| "ce_loss": 0.4804289501555127, | |
| "lb_loss": 0.9999999901426038 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 132000000, | |
| "cumulative_training_bytes": 132007108, | |
| "metrics": { | |
| "loss": 0.4903701265992885, | |
| "ce_loss": 0.4803701361360317, | |
| "lb_loss": 0.9999999899394623 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 133000000, | |
| "cumulative_training_bytes": 133003089, | |
| "metrics": { | |
| "loss": 0.49030012820954977, | |
| "ce_loss": 0.48030013774629293, | |
| "lb_loss": 0.9999999899266576 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 134000000, | |
| "cumulative_training_bytes": 134000170, | |
| "metrics": { | |
| "loss": 0.49024726003084046, | |
| "ce_loss": 0.4802472695675836, | |
| "lb_loss": 0.999999989902716 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 135000000, | |
| "cumulative_training_bytes": 135007268, | |
| "metrics": { | |
| "loss": 0.4902310506127265, | |
| "ce_loss": 0.48023106014946965, | |
| "lb_loss": 0.999999989883879 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 136000000, | |
| "cumulative_training_bytes": 136002367, | |
| "metrics": { | |
| "loss": 0.49015822482355786, | |
| "ce_loss": 0.48015823436030103, | |
| "lb_loss": 0.9999999898845927 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 137000000, | |
| "cumulative_training_bytes": 137002293, | |
| "metrics": { | |
| "loss": 0.49018864670178053, | |
| "ce_loss": 0.4801886562385237, | |
| "lb_loss": 0.9999999900512997 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 138000000, | |
| "cumulative_training_bytes": 138004174, | |
| "metrics": { | |
| "loss": 0.4901451457887006, | |
| "ce_loss": 0.4801451553254438, | |
| "lb_loss": 0.9999999901139867 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 139000000, | |
| "cumulative_training_bytes": 139006240, | |
| "metrics": { | |
| "loss": 0.4903390567955974, | |
| "ce_loss": 0.4803390663323406, | |
| "lb_loss": 0.999999990163354 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 140000000, | |
| "cumulative_training_bytes": 140006436, | |
| "metrics": { | |
| "loss": 0.49048212032282185, | |
| "ce_loss": 0.480482129859565, | |
| "lb_loss": 0.9999999901594661 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 141000000, | |
| "cumulative_training_bytes": 141007445, | |
| "metrics": { | |
| "loss": 0.4905080058343408, | |
| "ce_loss": 0.48050801537108395, | |
| "lb_loss": 0.9999999901041711 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 142000000, | |
| "cumulative_training_bytes": 142004918, | |
| "metrics": { | |
| "loss": 0.4905039665249063, | |
| "ce_loss": 0.48050397606164946, | |
| "lb_loss": 0.9999999901685075 | |
| } | |
| }, | |
| { | |
| "epoch": 3, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.49051486986155374, | |
| "ce_loss": 0.4805148793982969, | |
| "lb_loss": 0.9999999901265442, | |
| "training_bytes": 47653391 | |
| }, | |
| "cumulative_training_bytes": 142960216, | |
| "training_bytes_this_epoch": 47653391 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 143000000, | |
| "cumulative_training_bytes": 143005202, | |
| "metrics": { | |
| "loss": 0.4950260321299235, | |
| "ce_loss": 0.4850260416666667, | |
| "lb_loss": 0.9999999701976776 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 144000000, | |
| "cumulative_training_bytes": 144006005, | |
| "metrics": { | |
| "loss": 0.4904259713026729, | |
| "ce_loss": 0.48042598083941607, | |
| "lb_loss": 0.9999999908635216 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 145000000, | |
| "cumulative_training_bytes": 145001749, | |
| "metrics": { | |
| "loss": 0.4900371510437812, | |
| "ce_loss": 0.48003716058052437, | |
| "lb_loss": 0.9999999908472268 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 146000000, | |
| "cumulative_training_bytes": 146005280, | |
| "metrics": { | |
| "loss": 0.4904491602627556, | |
| "ce_loss": 0.48044916979949875, | |
| "lb_loss": 0.9999999887961194 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 147000000, | |
| "cumulative_training_bytes": 147006364, | |
| "metrics": { | |
| "loss": 0.49022183598212477, | |
| "ce_loss": 0.48022184551886793, | |
| "lb_loss": 0.9999999902158413 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 148000000, | |
| "cumulative_training_bytes": 148004606, | |
| "metrics": { | |
| "loss": 0.4898206580768932, | |
| "ce_loss": 0.47982066761363634, | |
| "lb_loss": 0.9999999900658926 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 149000000, | |
| "cumulative_training_bytes": 149001684, | |
| "metrics": { | |
| "loss": 0.48951690106452267, | |
| "ce_loss": 0.47951691060126583, | |
| "lb_loss": 0.9999999901161918 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 150000000, | |
| "cumulative_training_bytes": 150003252, | |
| "metrics": { | |
| "loss": 0.4902524334599995, | |
| "ce_loss": 0.4802524429967427, | |
| "lb_loss": 0.9999999897099473 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 151000000, | |
| "cumulative_training_bytes": 151004021, | |
| "metrics": { | |
| "loss": 0.4901546794499362, | |
| "ce_loss": 0.48015468898667935, | |
| "lb_loss": 0.9999999898484954 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 152000000, | |
| "cumulative_training_bytes": 152003583, | |
| "metrics": { | |
| "loss": 0.4901396364200731, | |
| "ce_loss": 0.48013964595681624, | |
| "lb_loss": 0.9999999896032542 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 153000000, | |
| "cumulative_training_bytes": 153004258, | |
| "metrics": { | |
| "loss": 0.49013379143505564, | |
| "ce_loss": 0.4801338009717988, | |
| "lb_loss": 0.9999999890058506 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 154000000, | |
| "cumulative_training_bytes": 154004288, | |
| "metrics": { | |
| "loss": 0.4900680994376158, | |
| "ce_loss": 0.480068108974359, | |
| "lb_loss": 0.999999989632179 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 155000000, | |
| "cumulative_training_bytes": 155004149, | |
| "metrics": { | |
| "loss": 0.4901411515178947, | |
| "ce_loss": 0.4801411610546379, | |
| "lb_loss": 0.9999999897755691 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 156000000, | |
| "cumulative_training_bytes": 156001930, | |
| "metrics": { | |
| "loss": 0.4899712896123179, | |
| "ce_loss": 0.47997129914906106, | |
| "lb_loss": 0.9999999894012868 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 157000000, | |
| "cumulative_training_bytes": 157005966, | |
| "metrics": { | |
| "loss": 0.4899014294959544, | |
| "ce_loss": 0.47990143903269755, | |
| "lb_loss": 0.9999999894758012 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 158000000, | |
| "cumulative_training_bytes": 158006659, | |
| "metrics": { | |
| "loss": 0.48980809543528125, | |
| "ce_loss": 0.4798081049720244, | |
| "lb_loss": 0.9999999895403854 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 159000000, | |
| "cumulative_training_bytes": 159001028, | |
| "metrics": { | |
| "loss": 0.4895588359286506, | |
| "ce_loss": 0.4795588454653938, | |
| "lb_loss": 0.9999999895585181 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 160000000, | |
| "cumulative_training_bytes": 160001860, | |
| "metrics": { | |
| "loss": 0.4894983198657726, | |
| "ce_loss": 0.47949832940251574, | |
| "lb_loss": 0.9999999894232549 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 161000000, | |
| "cumulative_training_bytes": 161000396, | |
| "metrics": { | |
| "loss": 0.4892045148159733, | |
| "ce_loss": 0.4792045243527165, | |
| "lb_loss": 0.9999999891972906 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 162000000, | |
| "cumulative_training_bytes": 162002358, | |
| "metrics": { | |
| "loss": 0.4891760811347486, | |
| "ce_loss": 0.47917609067149175, | |
| "lb_loss": 0.9999999890233505 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 163000000, | |
| "cumulative_training_bytes": 163000910, | |
| "metrics": { | |
| "loss": 0.4890335630177085, | |
| "ce_loss": 0.47903357255445167, | |
| "lb_loss": 0.9999999890675471 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 164000000, | |
| "cumulative_training_bytes": 164005597, | |
| "metrics": { | |
| "loss": 0.48890226029586237, | |
| "ce_loss": 0.47890226983260553, | |
| "lb_loss": 0.9999999888729321 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 165000000, | |
| "cumulative_training_bytes": 165002975, | |
| "metrics": { | |
| "loss": 0.4889194060730553, | |
| "ce_loss": 0.47891941560979845, | |
| "lb_loss": 0.9999999890234671 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 166000000, | |
| "cumulative_training_bytes": 166007294, | |
| "metrics": { | |
| "loss": 0.48903683825322025, | |
| "ce_loss": 0.4790368477899634, | |
| "lb_loss": 0.9999999888872696 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 167000000, | |
| "cumulative_training_bytes": 167001945, | |
| "metrics": { | |
| "loss": 0.4890494737780068, | |
| "ce_loss": 0.47904948331474995, | |
| "lb_loss": 0.9999999891006479 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 168000000, | |
| "cumulative_training_bytes": 168005336, | |
| "metrics": { | |
| "loss": 0.48906435342565363, | |
| "ce_loss": 0.4790643629623968, | |
| "lb_loss": 0.9999999890849336 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 169000000, | |
| "cumulative_training_bytes": 169002071, | |
| "metrics": { | |
| "loss": 0.48898078195840533, | |
| "ce_loss": 0.4789807914951485, | |
| "lb_loss": 0.9999999892392673 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 170000000, | |
| "cumulative_training_bytes": 170002507, | |
| "metrics": { | |
| "loss": 0.48883532836328514, | |
| "ce_loss": 0.4788353379000283, | |
| "lb_loss": 0.9999999893484761 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 171000000, | |
| "cumulative_training_bytes": 171005319, | |
| "metrics": { | |
| "loss": 0.48872788846981063, | |
| "ce_loss": 0.4787278980065538, | |
| "lb_loss": 0.9999999894365335 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 172000000, | |
| "cumulative_training_bytes": 172007475, | |
| "metrics": { | |
| "loss": 0.4886464073825819, | |
| "ce_loss": 0.4786464169193251, | |
| "lb_loss": 0.999999989424222 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 173000000, | |
| "cumulative_training_bytes": 173006995, | |
| "metrics": { | |
| "loss": 0.48865697313400097, | |
| "ce_loss": 0.47865698267074414, | |
| "lb_loss": 0.9999999893671633 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 174000000, | |
| "cumulative_training_bytes": 174002372, | |
| "metrics": { | |
| "loss": 0.48858499138826916, | |
| "ce_loss": 0.4785850009250123, | |
| "lb_loss": 0.9999999893993713 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 175000000, | |
| "cumulative_training_bytes": 175000872, | |
| "metrics": { | |
| "loss": 0.48849087463510193, | |
| "ce_loss": 0.4784908841718451, | |
| "lb_loss": 0.9999999894580696 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 176000000, | |
| "cumulative_training_bytes": 176007018, | |
| "metrics": { | |
| "loss": 0.4885006819310511, | |
| "ce_loss": 0.4785006914677943, | |
| "lb_loss": 0.9999999893523677 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 177000000, | |
| "cumulative_training_bytes": 177003062, | |
| "metrics": { | |
| "loss": 0.4884071085188124, | |
| "ce_loss": 0.4784071180555556, | |
| "lb_loss": 0.9999999894492003 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 178000000, | |
| "cumulative_training_bytes": 178005739, | |
| "metrics": { | |
| "loss": 0.4883760760542553, | |
| "ce_loss": 0.4783760855909985, | |
| "lb_loss": 0.9999999893214313 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 179000000, | |
| "cumulative_training_bytes": 179002039, | |
| "metrics": { | |
| "loss": 0.48841644468038026, | |
| "ce_loss": 0.4784164542171234, | |
| "lb_loss": 0.9999999892871193 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 180000000, | |
| "cumulative_training_bytes": 180001975, | |
| "metrics": { | |
| "loss": 0.4885168265783871, | |
| "ce_loss": 0.47851683611513024, | |
| "lb_loss": 0.9999999893307933 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 181000000, | |
| "cumulative_training_bytes": 181002156, | |
| "metrics": { | |
| "loss": 0.4885435228641423, | |
| "ce_loss": 0.4785435324008855, | |
| "lb_loss": 0.9999999895041126 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 182000000, | |
| "cumulative_training_bytes": 182006789, | |
| "metrics": { | |
| "loss": 0.48842715038972745, | |
| "ce_loss": 0.4784271599264706, | |
| "lb_loss": 0.9999999895516564 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 183000000, | |
| "cumulative_training_bytes": 183001003, | |
| "metrics": { | |
| "loss": 0.4883744527003505, | |
| "ce_loss": 0.4783744622370937, | |
| "lb_loss": 0.9999999895720363 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 184000000, | |
| "cumulative_training_bytes": 184002846, | |
| "metrics": { | |
| "loss": 0.4883268971737586, | |
| "ce_loss": 0.47832690671050176, | |
| "lb_loss": 0.9999999894599509 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 185000000, | |
| "cumulative_training_bytes": 185004724, | |
| "metrics": { | |
| "loss": 0.4882663181899502, | |
| "ce_loss": 0.47826632772669336, | |
| "lb_loss": 0.9999999894400365 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 186000000, | |
| "cumulative_training_bytes": 186007260, | |
| "metrics": { | |
| "loss": 0.48828150444600626, | |
| "ce_loss": 0.47828151398274943, | |
| "lb_loss": 0.9999999894740508 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 187000000, | |
| "cumulative_training_bytes": 187007019, | |
| "metrics": { | |
| "loss": 0.4882290801773191, | |
| "ce_loss": 0.47822908971406225, | |
| "lb_loss": 0.9999999894010861 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 188000000, | |
| "cumulative_training_bytes": 188003736, | |
| "metrics": { | |
| "loss": 0.488216156216274, | |
| "ce_loss": 0.4782161657530172, | |
| "lb_loss": 0.9999999894326629 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 189000000, | |
| "cumulative_training_bytes": 189007403, | |
| "metrics": { | |
| "loss": 0.4881525303701408, | |
| "ce_loss": 0.47815253990688394, | |
| "lb_loss": 0.9999999895538252 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 190000000, | |
| "cumulative_training_bytes": 190003337, | |
| "metrics": { | |
| "loss": 0.4880743821461995, | |
| "ce_loss": 0.4780743916829427, | |
| "lb_loss": 0.9999999895905299 | |
| } | |
| }, | |
| { | |
| "epoch": 4, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.488021058104645, | |
| "ce_loss": 0.4780210676413882, | |
| "lb_loss": 0.9999999895423727, | |
| "training_bytes": 47653398 | |
| }, | |
| "cumulative_training_bytes": 190613614, | |
| "training_bytes_this_epoch": 47653398 | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 191000000, | |
| "cumulative_training_bytes": 191004295, | |
| "metrics": { | |
| "loss": 0.48361365467894313, | |
| "ce_loss": 0.4736136642156863, | |
| "lb_loss": 0.9999999906502518 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 192000000, | |
| "cumulative_training_bytes": 192003486, | |
| "metrics": { | |
| "loss": 0.4822246106290027, | |
| "ce_loss": 0.47222462016574585, | |
| "lb_loss": 0.9999999911086994 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 193000000, | |
| "cumulative_training_bytes": 193000756, | |
| "metrics": { | |
| "loss": 0.48206590686197065, | |
| "ce_loss": 0.4720659163987138, | |
| "lb_loss": 0.9999999875424376 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 194000000, | |
| "cumulative_training_bytes": 194006438, | |
| "metrics": { | |
| "loss": 0.4826827534723066, | |
| "ce_loss": 0.47268276300904977, | |
| "lb_loss": 0.9999999888072726 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 195000000, | |
| "cumulative_training_bytes": 195005382, | |
| "metrics": { | |
| "loss": 0.48297037944927085, | |
| "ce_loss": 0.472970388986014, | |
| "lb_loss": 0.9999999882249565 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 196000000, | |
| "cumulative_training_bytes": 196002015, | |
| "metrics": { | |
| "loss": 0.4832766776071315, | |
| "ce_loss": 0.47327668714387466, | |
| "lb_loss": 0.999999988792289 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 197000000, | |
| "cumulative_training_bytes": 197006361, | |
| "metrics": { | |
| "loss": 0.48392085377260935, | |
| "ce_loss": 0.4739208633093525, | |
| "lb_loss": 0.9999999878503721 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 198000000, | |
| "cumulative_training_bytes": 198003880, | |
| "metrics": { | |
| "loss": 0.483928608201846, | |
| "ce_loss": 0.4739286177385892, | |
| "lb_loss": 0.99999998751023 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 199000000, | |
| "cumulative_training_bytes": 199006196, | |
| "metrics": { | |
| "loss": 0.48404037288334817, | |
| "ce_loss": 0.47404038242009133, | |
| "lb_loss": 0.9999999879702041 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 200000000, | |
| "cumulative_training_bytes": 200002073, | |
| "metrics": { | |
| "loss": 0.4839540720959099, | |
| "ce_loss": 0.47395408163265307, | |
| "lb_loss": 0.999999988760267 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 201000000, | |
| "cumulative_training_bytes": 201002611, | |
| "metrics": { | |
| "loss": 0.4842147074617819, | |
| "ce_loss": 0.4742147169985251, | |
| "lb_loss": 0.9999999887032495 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 202000000, | |
| "cumulative_training_bytes": 202000755, | |
| "metrics": { | |
| "loss": 0.48400739288586786, | |
| "ce_loss": 0.474007402422611, | |
| "lb_loss": 0.9999999884480903 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 203000000, | |
| "cumulative_training_bytes": 203001562, | |
| "metrics": { | |
| "loss": 0.4841745324391381, | |
| "ce_loss": 0.47417454197588127, | |
| "lb_loss": 0.9999999887573181 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 204000000, | |
| "cumulative_training_bytes": 204005682, | |
| "metrics": { | |
| "loss": 0.48423728844666647, | |
| "ce_loss": 0.47423729798340963, | |
| "lb_loss": 0.9999999889179008 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 205000000, | |
| "cumulative_training_bytes": 205003502, | |
| "metrics": { | |
| "loss": 0.484211044443555, | |
| "ce_loss": 0.47421105398029817, | |
| "lb_loss": 0.9999999888598348 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 206000000, | |
| "cumulative_training_bytes": 206008019, | |
| "metrics": { | |
| "loss": 0.48419132477137033, | |
| "ce_loss": 0.4741913343081135, | |
| "lb_loss": 0.9999999889632016 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 207000000, | |
| "cumulative_training_bytes": 207007717, | |
| "metrics": { | |
| "loss": 0.4842689376011073, | |
| "ce_loss": 0.47426894713785045, | |
| "lb_loss": 0.9999999892488818 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 208000000, | |
| "cumulative_training_bytes": 208005208, | |
| "metrics": { | |
| "loss": 0.4841757200888075, | |
| "ce_loss": 0.47417572962555066, | |
| "lb_loss": 0.999999989313176 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 209000000, | |
| "cumulative_training_bytes": 209005329, | |
| "metrics": { | |
| "loss": 0.4841251532236735, | |
| "ce_loss": 0.47412516276041666, | |
| "lb_loss": 0.9999999895443519 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 210000000, | |
| "cumulative_training_bytes": 210006121, | |
| "metrics": { | |
| "loss": 0.4841085443945093, | |
| "ce_loss": 0.47410855393125245, | |
| "lb_loss": 0.9999999895909708 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 211000000, | |
| "cumulative_training_bytes": 211003532, | |
| "metrics": { | |
| "loss": 0.48414475511940114, | |
| "ce_loss": 0.4741447646561443, | |
| "lb_loss": 0.9999999894947094 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 212000000, | |
| "cumulative_training_bytes": 212007723, | |
| "metrics": { | |
| "loss": 0.48415256366347176, | |
| "ce_loss": 0.4741525732002149, | |
| "lb_loss": 0.9999999894966027 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 213000000, | |
| "cumulative_training_bytes": 213005205, | |
| "metrics": { | |
| "loss": 0.48423584617656507, | |
| "ce_loss": 0.47423585571330823, | |
| "lb_loss": 0.9999999894371515 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 214000000, | |
| "cumulative_training_bytes": 214007542, | |
| "metrics": { | |
| "loss": 0.4842972747625365, | |
| "ce_loss": 0.47429728429927964, | |
| "lb_loss": 0.9999999894023176 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 215000000, | |
| "cumulative_training_bytes": 215006636, | |
| "metrics": { | |
| "loss": 0.48420266889447544, | |
| "ce_loss": 0.4742026784312186, | |
| "lb_loss": 0.9999999897414117 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 216000000, | |
| "cumulative_training_bytes": 216002411, | |
| "metrics": { | |
| "loss": 0.48442725453235763, | |
| "ce_loss": 0.4744272640691008, | |
| "lb_loss": 0.9999999896582165 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 217000000, | |
| "cumulative_training_bytes": 217003351, | |
| "metrics": { | |
| "loss": 0.48465302021652246, | |
| "ce_loss": 0.4746530297532656, | |
| "lb_loss": 0.9999999896708351 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 218000000, | |
| "cumulative_training_bytes": 218001934, | |
| "metrics": { | |
| "loss": 0.48466454465906106, | |
| "ce_loss": 0.4746645541958042, | |
| "lb_loss": 0.9999999894795718 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 219000000, | |
| "cumulative_training_bytes": 219001498, | |
| "metrics": { | |
| "loss": 0.48470003685327034, | |
| "ce_loss": 0.4747000463900135, | |
| "lb_loss": 0.9999999894626067 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 220000000, | |
| "cumulative_training_bytes": 220000802, | |
| "metrics": { | |
| "loss": 0.48471733482440416, | |
| "ce_loss": 0.4747173443611473, | |
| "lb_loss": 0.9999999896022145 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 221000000, | |
| "cumulative_training_bytes": 221002631, | |
| "metrics": { | |
| "loss": 0.4847172157847394, | |
| "ce_loss": 0.4747172253214826, | |
| "lb_loss": 0.9999999895699386 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 222000000, | |
| "cumulative_training_bytes": 222001546, | |
| "metrics": { | |
| "loss": 0.48476429971392804, | |
| "ce_loss": 0.4747643092506712, | |
| "lb_loss": 0.9999999897434038 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 223000000, | |
| "cumulative_training_bytes": 223002095, | |
| "metrics": { | |
| "loss": 0.4846939223298289, | |
| "ce_loss": 0.47469393186657205, | |
| "lb_loss": 0.9999999896217131 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 224000000, | |
| "cumulative_training_bytes": 224004704, | |
| "metrics": { | |
| "loss": 0.4846698684175937, | |
| "ce_loss": 0.47466987795433685, | |
| "lb_loss": 0.9999999897695562 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 225000000, | |
| "cumulative_training_bytes": 225002022, | |
| "metrics": { | |
| "loss": 0.48464635646704474, | |
| "ce_loss": 0.4746463660037879, | |
| "lb_loss": 0.9999999898666792 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 226000000, | |
| "cumulative_training_bytes": 226003031, | |
| "metrics": { | |
| "loss": 0.4846968945550516, | |
| "ce_loss": 0.47469690409179477, | |
| "lb_loss": 0.999999989960508 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 227000000, | |
| "cumulative_training_bytes": 227007213, | |
| "metrics": { | |
| "loss": 0.48466569770009893, | |
| "ce_loss": 0.4746657072368421, | |
| "lb_loss": 0.9999999899738713 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 228000000, | |
| "cumulative_training_bytes": 228002618, | |
| "metrics": { | |
| "loss": 0.4847170054192085, | |
| "ce_loss": 0.47471701495595164, | |
| "lb_loss": 0.9999999898766133 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 229000000, | |
| "cumulative_training_bytes": 229002817, | |
| "metrics": { | |
| "loss": 0.4849277419846056, | |
| "ce_loss": 0.47492775152134875, | |
| "lb_loss": 0.9999999898439013 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 230000000, | |
| "cumulative_training_bytes": 230004657, | |
| "metrics": { | |
| "loss": 0.48519230282958, | |
| "ce_loss": 0.47519231236632314, | |
| "lb_loss": 0.9999999897664978 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 231000000, | |
| "cumulative_training_bytes": 231006924, | |
| "metrics": { | |
| "loss": 0.4853118831206326, | |
| "ce_loss": 0.4753118926573758, | |
| "lb_loss": 0.9999999898737654 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 232000000, | |
| "cumulative_training_bytes": 232007018, | |
| "metrics": { | |
| "loss": 0.4854375916427203, | |
| "ce_loss": 0.4754376011794635, | |
| "lb_loss": 0.9999999898104178 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 233000000, | |
| "cumulative_training_bytes": 233006236, | |
| "metrics": { | |
| "loss": 0.48551042782778, | |
| "ce_loss": 0.47551043736452314, | |
| "lb_loss": 0.9999999898362022 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 234000000, | |
| "cumulative_training_bytes": 234000486, | |
| "metrics": { | |
| "loss": 0.4855104365451488, | |
| "ce_loss": 0.47551044608189197, | |
| "lb_loss": 0.9999999898169262 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 235000000, | |
| "cumulative_training_bytes": 235002824, | |
| "metrics": { | |
| "loss": 0.485555099787045, | |
| "ce_loss": 0.47555510932378814, | |
| "lb_loss": 0.9999999898311206 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 236000000, | |
| "cumulative_training_bytes": 236004788, | |
| "metrics": { | |
| "loss": 0.48557505530384387, | |
| "ce_loss": 0.47557506484058704, | |
| "lb_loss": 0.9999999897843591 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 237000000, | |
| "cumulative_training_bytes": 237001532, | |
| "metrics": { | |
| "loss": 0.4854937203452311, | |
| "ce_loss": 0.47549372988197425, | |
| "lb_loss": 0.999999989826477 | |
| } | |
| }, | |
| { | |
| "checkpoint_type": "bytes", | |
| "bytes_threshold": 238000000, | |
| "cumulative_training_bytes": 238004993, | |
| "metrics": { | |
| "loss": 0.485468689970447, | |
| "ce_loss": 0.4754686995071902, | |
| "lb_loss": 0.9999999898203087 | |
| } | |
| }, | |
| { | |
| "epoch": 5, | |
| "checkpoint_type": "epoch", | |
| "metrics": { | |
| "loss": 0.48547107517566046, | |
| "ce_loss": 0.4754710847124036, | |
| "lb_loss": 0.9999999897626342, | |
| "training_bytes": 47653400 | |
| }, | |
| "cumulative_training_bytes": 238267014, | |
| "training_bytes_this_epoch": 47653400 | |
| } | |
| ] | |
| } |