PI1M-2stg / metadata.json
jordiferrero's picture
Add files using upload-large-folder tool
6bce6da verified
{
"run_name": "run_large_20260115_191350",
"timestamp": "20260115_191350",
"phase": "large",
"config": {
"arch_layout": [
"m4",
[
"T1m4",
[
"T22"
],
"m4T1"
],
"m4"
],
"d_model": [
1024,
1024,
1536
],
"d_intermediate": [
0,
2816,
4096
],
"vocab_size": 256,
"ssm_cfg": {
"chunk_size": 256,
"d_conv": 4,
"d_state": 128,
"expand": 2
},
"attn_cfg": {
"num_heads": [
16,
16,
16
],
"rotary_emb_dim": [
32,
32,
48
],
"window_size": [
1023,
1023,
-1
]
},
"tie_embeddings": false
},
"training_args": {
"data": "datasets/PI1M/PI1M_v2.csv",
"max_samples": null,
"batch_size": 16,
"epochs": 5,
"lr": 0.0001,
"weight_decay": 0.1,
"gradient_accumulation": 8,
"concatenate": true,
"num_concatenate": 10,
"concatenate_separator": " ",
"checkpoint_bytes": 1000000,
"num_test_samples": 5,
"num_visualize": 5,
"skip_visualization": false
},
"dataset_info": {
"train_size": 99574,
"test_size": 5,
"test_smiles_file": "checkpoints/run_large_20260115_191350/test_smiles.txt"
},
"model_info": {
"num_parameters": 622923776,
"device": "cuda",
"dtype": "torch.bfloat16",
"use_amp": true
},
"training_history": [
{
"checkpoint_type": "bytes",
"bytes_threshold": 1000000,
"cumulative_training_bytes": 1000166,
"metrics": {
"loss": 3.0352404484382043,
"ce_loss": 3.0252403846153846,
"lb_loss": 0.9999999889960656
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 2000000,
"cumulative_training_bytes": 2000240,
"metrics": {
"loss": 2.107340772335346,
"ce_loss": 2.097340745192308,
"lb_loss": 0.9999999871620765
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 3000000,
"cumulative_training_bytes": 3001794,
"metrics": {
"loss": 1.7094185730380476,
"ce_loss": 1.6994185581841432,
"lb_loss": 0.9999999873473516
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 4000000,
"cumulative_training_bytes": 4002359,
"metrics": {
"loss": 1.47650072853762,
"ce_loss": 1.4665007197696738,
"lb_loss": 0.9999999890171863
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 5000000,
"cumulative_training_bytes": 5005670,
"metrics": {
"loss": 1.3171558716545808,
"ce_loss": 1.3071558665644172,
"lb_loss": 0.9999999897611653
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 6000000,
"cumulative_training_bytes": 6001321,
"metrics": {
"loss": 1.2017559169808312,
"ce_loss": 1.1917559143222507,
"lb_loss": 0.9999999908535072
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 7000000,
"cumulative_training_bytes": 7001673,
"metrics": {
"loss": 1.1151093587948484,
"ce_loss": 1.1051093578860898,
"lb_loss": 0.9999999904684795
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 8000000,
"cumulative_training_bytes": 8004669,
"metrics": {
"loss": 1.0468063034773787,
"ce_loss": 1.0368063038793103,
"lb_loss": 0.9999999897804297
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 9000000,
"cumulative_training_bytes": 9006752,
"metrics": {
"loss": 0.9919913549626127,
"ce_loss": 0.9819913563829787,
"lb_loss": 0.9999999897023465
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 10000000,
"cumulative_training_bytes": 10007281,
"metrics": {
"loss": 0.9471440684010387,
"ce_loss": 0.9371440706355283,
"lb_loss": 0.9999999893660932
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 11000000,
"cumulative_training_bytes": 11001365,
"metrics": {
"loss": 0.9100927569407938,
"ce_loss": 0.900092759836351,
"lb_loss": 0.999999989540132
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 12000000,
"cumulative_training_bytes": 12005386,
"metrics": {
"loss": 0.8784949809940438,
"ce_loss": 0.868494984444799,
"lb_loss": 0.999999989882045
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 13000000,
"cumulative_training_bytes": 13001269,
"metrics": {
"loss": 0.8592479796569771,
"ce_loss": 0.849247983573954,
"lb_loss": 0.999999989954668
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 14000000,
"cumulative_training_bytes": 14005280,
"metrics": {
"loss": 0.8378439935604906,
"ce_loss": 0.8278439978801969,
"lb_loss": 0.9999999899245978
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 15000000,
"cumulative_training_bytes": 15001797,
"metrics": {
"loss": 0.8179623213681307,
"ce_loss": 0.8079623260342186,
"lb_loss": 0.9999999895889742
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 16000000,
"cumulative_training_bytes": 16003308,
"metrics": {
"loss": 0.7999628585397256,
"ce_loss": 0.7899628635112494,
"lb_loss": 0.999999989471463
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 17000000,
"cumulative_training_bytes": 17001780,
"metrics": {
"loss": 0.783798369592028,
"ce_loss": 0.773798374831005,
"lb_loss": 0.9999999887720858
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 18000000,
"cumulative_training_bytes": 18002585,
"metrics": {
"loss": 0.7691971354788922,
"ce_loss": 0.7591971409574468,
"lb_loss": 0.9999999888399814
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 19000000,
"cumulative_training_bytes": 19004388,
"metrics": {
"loss": 0.7562685100266358,
"ce_loss": 0.746268515719468,
"lb_loss": 0.9999999887325359
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 20000000,
"cumulative_training_bytes": 20001795,
"metrics": {
"loss": 0.7443181650561906,
"ce_loss": 0.7343181709418071,
"lb_loss": 0.9999999887043265
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 21000000,
"cumulative_training_bytes": 21006219,
"metrics": {
"loss": 0.7334088699425653,
"ce_loss": 0.723408876002552,
"lb_loss": 0.9999999888743791
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 22000000,
"cumulative_training_bytes": 22003647,
"metrics": {
"loss": 0.7233542565306926,
"ce_loss": 0.7133542627479986,
"lb_loss": 0.9999999891080966
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 23000000,
"cumulative_training_bytes": 23000855,
"metrics": {
"loss": 0.7141935865044633,
"ce_loss": 0.7041935928654679,
"lb_loss": 0.9999999891627919
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 24000000,
"cumulative_training_bytes": 24007583,
"metrics": {
"loss": 0.7056202586567953,
"ce_loss": 0.6956202651515152,
"lb_loss": 0.9999999891818045
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 25000000,
"cumulative_training_bytes": 25004319,
"metrics": {
"loss": 0.6978230217149393,
"ce_loss": 0.687823028330781,
"lb_loss": 0.9999999895577774
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 26000000,
"cumulative_training_bytes": 26000600,
"metrics": {
"loss": 0.6906206210337261,
"ce_loss": 0.6806206277614139,
"lb_loss": 0.9999999897293911
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 27000000,
"cumulative_training_bytes": 27007515,
"metrics": {
"loss": 0.6838098439610576,
"ce_loss": 0.6738098507938758,
"lb_loss": 0.9999999897926835
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 28000000,
"cumulative_training_bytes": 28003023,
"metrics": {
"loss": 0.6774992880874688,
"ce_loss": 0.6674992950164069,
"lb_loss": 0.9999999895687797
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 29000000,
"cumulative_training_bytes": 29003935,
"metrics": {
"loss": 0.6715684946638226,
"ce_loss": 0.6615685016829461,
"lb_loss": 0.9999999895046732
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 30000000,
"cumulative_training_bytes": 30001066,
"metrics": {
"loss": 0.6660281601701846,
"ce_loss": 0.6560281672728433,
"lb_loss": 0.9999999894573715
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 31000000,
"cumulative_training_bytes": 31004436,
"metrics": {
"loss": 0.6609612641201458,
"ce_loss": 0.6509612713015559,
"lb_loss": 0.9999999894746058
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 32000000,
"cumulative_training_bytes": 32006649,
"metrics": {
"loss": 0.6561554203763533,
"ce_loss": 0.646155427631579,
"lb_loss": 0.9999999895050194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 33000000,
"cumulative_training_bytes": 33004203,
"metrics": {
"loss": 0.6516305574961438,
"ce_loss": 0.6416305648201857,
"lb_loss": 0.9999999895588151
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 34000000,
"cumulative_training_bytes": 34006104,
"metrics": {
"loss": 0.6472530922785559,
"ce_loss": 0.6372530996678676,
"lb_loss": 0.9999999896520646
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 35000000,
"cumulative_training_bytes": 35005618,
"metrics": {
"loss": 0.6431124474281974,
"ce_loss": 0.6331124548785824,
"lb_loss": 0.9999999896725271
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 36000000,
"cumulative_training_bytes": 36002823,
"metrics": {
"loss": 0.6391829455870056,
"ce_loss": 0.6291829530950862,
"lb_loss": 0.9999999896918579
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 37000000,
"cumulative_training_bytes": 37006427,
"metrics": {
"loss": 0.6354130913090232,
"ce_loss": 0.6254130988721026,
"lb_loss": 0.9999999896752716
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 38000000,
"cumulative_training_bytes": 38005922,
"metrics": {
"loss": 0.6318843585695924,
"ce_loss": 0.6218843661847673,
"lb_loss": 0.9999999897196099
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 39000000,
"cumulative_training_bytes": 39004443,
"metrics": {
"loss": 0.6285198655931632,
"ce_loss": 0.6185198732577543,
"lb_loss": 0.9999999895276488
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 40000000,
"cumulative_training_bytes": 40005613,
"metrics": {
"loss": 0.6254313996155814,
"ce_loss": 0.615431407326761,
"lb_loss": 0.9999999897083863
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 41000000,
"cumulative_training_bytes": 41003596,
"metrics": {
"loss": 0.6224746753085582,
"ce_loss": 0.6124746830640643,
"lb_loss": 0.9999999896242941
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 42000000,
"cumulative_training_bytes": 42004130,
"metrics": {
"loss": 0.619576180100767,
"ce_loss": 0.609576187898815,
"lb_loss": 0.9999999894482935
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 43000000,
"cumulative_training_bytes": 43002856,
"metrics": {
"loss": 0.6168661168497852,
"ce_loss": 0.6068661246883903,
"lb_loss": 0.9999999894715442
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 44000000,
"cumulative_training_bytes": 44000615,
"metrics": {
"loss": 0.6142508432585481,
"ce_loss": 0.6042508511355725,
"lb_loss": 0.9999999894192938
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 45000000,
"cumulative_training_bytes": 45002728,
"metrics": {
"loss": 0.6117183565789184,
"ce_loss": 0.6017183644929386,
"lb_loss": 0.9999999893305962
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 46000000,
"cumulative_training_bytes": 46000713,
"metrics": {
"loss": 0.6093004826243594,
"ce_loss": 0.5993004905734975,
"lb_loss": 0.9999999892538988
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 47000000,
"cumulative_training_bytes": 47001586,
"metrics": {
"loss": 0.6069603338424916,
"ce_loss": 0.5969603418255132,
"lb_loss": 0.999999989075395
}
},
{
"epoch": 1,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.6054869050538325,
"ce_loss": 0.5954869130583226,
"lb_loss": 0.9999999890922734,
"training_bytes": 47653409
},
"cumulative_training_bytes": 47653409,
"training_bytes_this_epoch": 47653409
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 48000000,
"cumulative_training_bytes": 48006676,
"metrics": {
"loss": 0.49496941981108294,
"ce_loss": 0.4849694293478261,
"lb_loss": 0.9999999935212343
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 49000000,
"cumulative_training_bytes": 49000759,
"metrics": {
"loss": 0.49630592086098413,
"ce_loss": 0.4863059303977273,
"lb_loss": 0.9999999932267449
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 50000000,
"cumulative_training_bytes": 50005240,
"metrics": {
"loss": 0.4959718451049506,
"ce_loss": 0.4859718546416938,
"lb_loss": 0.9999999914573148
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 51000000,
"cumulative_training_bytes": 51007539,
"metrics": {
"loss": 0.49752317824864495,
"ce_loss": 0.4875231877853881,
"lb_loss": 0.9999999910184781
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 52000000,
"cumulative_training_bytes": 52002554,
"metrics": {
"loss": 0.4988107849174822,
"ce_loss": 0.4888107944542254,
"lb_loss": 0.9999999891914112
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 53000000,
"cumulative_training_bytes": 53005306,
"metrics": {
"loss": 0.49884286868214095,
"ce_loss": 0.4888428782188841,
"lb_loss": 0.9999999886589159
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 54000000,
"cumulative_training_bytes": 54000123,
"metrics": {
"loss": 0.49843673654287085,
"ce_loss": 0.488436746079614,
"lb_loss": 0.9999999882803895
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 55000000,
"cumulative_training_bytes": 55003152,
"metrics": {
"loss": 0.4980025132497152,
"ce_loss": 0.48800252278645834,
"lb_loss": 0.9999999890724818
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 56000000,
"cumulative_training_bytes": 56002937,
"metrics": {
"loss": 0.4978086235979956,
"ce_loss": 0.48780863313473877,
"lb_loss": 0.9999999890733924
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 57000000,
"cumulative_training_bytes": 57004703,
"metrics": {
"loss": 0.4975252436342879,
"ce_loss": 0.48752525317103107,
"lb_loss": 0.9999999889765551
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 58000000,
"cumulative_training_bytes": 58002959,
"metrics": {
"loss": 0.49715732681680713,
"ce_loss": 0.4871573363535503,
"lb_loss": 0.9999999886698271
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 59000000,
"cumulative_training_bytes": 59000108,
"metrics": {
"loss": 0.4970432515893526,
"ce_loss": 0.48704326112609575,
"lb_loss": 0.9999999883443378
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 60000000,
"cumulative_training_bytes": 60007478,
"metrics": {
"loss": 0.4969303793951454,
"ce_loss": 0.48693038893188856,
"lb_loss": 0.9999999884481401
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 61000000,
"cumulative_training_bytes": 61002660,
"metrics": {
"loss": 0.49673105242600757,
"ce_loss": 0.48673106196275073,
"lb_loss": 0.9999999883864875
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 62000000,
"cumulative_training_bytes": 62003465,
"metrics": {
"loss": 0.49654987219300095,
"ce_loss": 0.4865498817297441,
"lb_loss": 0.9999999883713753
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 63000000,
"cumulative_training_bytes": 63000868,
"metrics": {
"loss": 0.4964099013555799,
"ce_loss": 0.48640991089232305,
"lb_loss": 0.9999999887089905
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 64000000,
"cumulative_training_bytes": 64003546,
"metrics": {
"loss": 0.49635096437528303,
"ce_loss": 0.4863509739120262,
"lb_loss": 0.9999999889827633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 65000000,
"cumulative_training_bytes": 65001846,
"metrics": {
"loss": 0.4962221452934289,
"ce_loss": 0.48622215483017206,
"lb_loss": 0.9999999886680185
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 66000000,
"cumulative_training_bytes": 66004938,
"metrics": {
"loss": 0.4961587034532485,
"ce_loss": 0.48615871298999164,
"lb_loss": 0.9999999882679765
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 67000000,
"cumulative_training_bytes": 67000216,
"metrics": {
"loss": 0.49601907669743406,
"ce_loss": 0.4860190862341772,
"lb_loss": 0.9999999884704623
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 68000000,
"cumulative_training_bytes": 68000224,
"metrics": {
"loss": 0.4964207015242049,
"ce_loss": 0.4864207110609481,
"lb_loss": 0.9999999881822244
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 69000000,
"cumulative_training_bytes": 69005372,
"metrics": {
"loss": 0.49684213258408866,
"ce_loss": 0.4868421421208318,
"lb_loss": 0.9999999881602821
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 70000000,
"cumulative_training_bytes": 70001864,
"metrics": {
"loss": 0.497037369488608,
"ce_loss": 0.48703737902535116,
"lb_loss": 0.9999999881770848
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 71000000,
"cumulative_training_bytes": 71000907,
"metrics": {
"loss": 0.49706029712117744,
"ce_loss": 0.4870603066579206,
"lb_loss": 0.9999999880360634
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 72000000,
"cumulative_training_bytes": 72005398,
"metrics": {
"loss": 0.49712042088778513,
"ce_loss": 0.4871204304245283,
"lb_loss": 0.9999999880790711
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 73000000,
"cumulative_training_bytes": 73003962,
"metrics": {
"loss": 0.49715716096929913,
"ce_loss": 0.4871571705060423,
"lb_loss": 0.9999999879890338
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 74000000,
"cumulative_training_bytes": 74006324,
"metrics": {
"loss": 0.4971806565123705,
"ce_loss": 0.48718066604911364,
"lb_loss": 0.9999999879612822
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 75000000,
"cumulative_training_bytes": 75002178,
"metrics": {
"loss": 0.4972360369138309,
"ce_loss": 0.48723604645057406,
"lb_loss": 0.999999987898805
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 76000000,
"cumulative_training_bytes": 76006119,
"metrics": {
"loss": 0.49723345379388895,
"ce_loss": 0.4872334633306321,
"lb_loss": 0.9999999879728066
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 77000000,
"cumulative_training_bytes": 77005284,
"metrics": {
"loss": 0.4972499007815454,
"ce_loss": 0.48724991031828857,
"lb_loss": 0.9999999881039516
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 78000000,
"cumulative_training_bytes": 78007177,
"metrics": {
"loss": 0.4972263361683527,
"ce_loss": 0.4872263457050959,
"lb_loss": 0.9999999881362097
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 79000000,
"cumulative_training_bytes": 79001491,
"metrics": {
"loss": 0.4971963830499691,
"ce_loss": 0.48719639258671227,
"lb_loss": 0.9999999881780725
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 80000000,
"cumulative_training_bytes": 80002957,
"metrics": {
"loss": 0.49715744238633375,
"ce_loss": 0.4871574519230769,
"lb_loss": 0.9999999881778243
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 81000000,
"cumulative_training_bytes": 81002131,
"metrics": {
"loss": 0.4970846991314543,
"ce_loss": 0.4870847086681975,
"lb_loss": 0.9999999881201305
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 82000000,
"cumulative_training_bytes": 82000379,
"metrics": {
"loss": 0.497049108552869,
"ce_loss": 0.48704911808961215,
"lb_loss": 0.9999999881481625
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 83000000,
"cumulative_training_bytes": 83002326,
"metrics": {
"loss": 0.49690102084670573,
"ce_loss": 0.4869010303834489,
"lb_loss": 0.9999999881849545
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 84000000,
"cumulative_training_bytes": 84004823,
"metrics": {
"loss": 0.4968436548828903,
"ce_loss": 0.48684366441963345,
"lb_loss": 0.9999999882473252
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 85000000,
"cumulative_training_bytes": 85001132,
"metrics": {
"loss": 0.496751819840948,
"ce_loss": 0.4867518293776912,
"lb_loss": 0.9999999883161697
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 86000000,
"cumulative_training_bytes": 86000628,
"metrics": {
"loss": 0.4967399565175699,
"ce_loss": 0.4867399660543131,
"lb_loss": 0.9999999883718574
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 87000000,
"cumulative_training_bytes": 87000672,
"metrics": {
"loss": 0.49681193101589355,
"ce_loss": 0.4868119405526367,
"lb_loss": 0.9999999883783122
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 88000000,
"cumulative_training_bytes": 88002075,
"metrics": {
"loss": 0.49670176321425324,
"ce_loss": 0.4867017727509964,
"lb_loss": 0.9999999882917427
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 89000000,
"cumulative_training_bytes": 89004728,
"metrics": {
"loss": 0.49663121152807166,
"ce_loss": 0.4866312210648148,
"lb_loss": 0.9999999883770943
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 90000000,
"cumulative_training_bytes": 90003725,
"metrics": {
"loss": 0.49656294246108723,
"ce_loss": 0.4865629519978304,
"lb_loss": 0.999999988555391
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 91000000,
"cumulative_training_bytes": 91002611,
"metrics": {
"loss": 0.4965044176845958,
"ce_loss": 0.48650442722133896,
"lb_loss": 0.9999999886813296
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 92000000,
"cumulative_training_bytes": 92003164,
"metrics": {
"loss": 0.4964984069213024,
"ce_loss": 0.4864984164580456,
"lb_loss": 0.9999999888961651
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 93000000,
"cumulative_training_bytes": 93001402,
"metrics": {
"loss": 0.49645113397473944,
"ce_loss": 0.4864511435114826,
"lb_loss": 0.999999989119787
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 94000000,
"cumulative_training_bytes": 94007638,
"metrics": {
"loss": 0.4963942520052126,
"ce_loss": 0.48639426154195575,
"lb_loss": 0.9999999891207247
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 95000000,
"cumulative_training_bytes": 95004271,
"metrics": {
"loss": 0.4963107445261611,
"ce_loss": 0.48631075406290425,
"lb_loss": 0.9999999891373812
}
},
{
"epoch": 2,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.4962876345627106,
"ce_loss": 0.48628764409945374,
"lb_loss": 0.999999989168886,
"training_bytes": 47653416
},
"cumulative_training_bytes": 95306825,
"training_bytes_this_epoch": 47653416
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 96000000,
"cumulative_training_bytes": 96003218,
"metrics": {
"loss": 0.49025411134237773,
"ce_loss": 0.4802541208791209,
"lb_loss": 0.9999999908300546
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 97000000,
"cumulative_training_bytes": 97000816,
"metrics": {
"loss": 0.4910255136533021,
"ce_loss": 0.48102552319004527,
"lb_loss": 0.9999999905603504
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 98000000,
"cumulative_training_bytes": 98005358,
"metrics": {
"loss": 0.49233333855107553,
"ce_loss": 0.4823333480878187,
"lb_loss": 0.9999999910508607
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 99000000,
"cumulative_training_bytes": 99000141,
"metrics": {
"loss": 0.4918436110636709,
"ce_loss": 0.4818436206004141,
"lb_loss": 0.999999992102076
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 100000000,
"cumulative_training_bytes": 100005926,
"metrics": {
"loss": 0.4912067290626054,
"ce_loss": 0.48120673859934854,
"lb_loss": 0.9999999912631629
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 101000000,
"cumulative_training_bytes": 101001458,
"metrics": {
"loss": 0.4909990244014289,
"ce_loss": 0.48099903393817206,
"lb_loss": 0.999999990947144
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 102000000,
"cumulative_training_bytes": 102004630,
"metrics": {
"loss": 0.49028549532595705,
"ce_loss": 0.4802855048627002,
"lb_loss": 0.9999999912707156
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 103000000,
"cumulative_training_bytes": 103004382,
"metrics": {
"loss": 0.490558137229426,
"ce_loss": 0.48055814676616915,
"lb_loss": 0.99999999092586
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 104000000,
"cumulative_training_bytes": 104002283,
"metrics": {
"loss": 0.49042572008880747,
"ce_loss": 0.48042572962555063,
"lb_loss": 0.9999999908623717
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 105000000,
"cumulative_training_bytes": 105006513,
"metrics": {
"loss": 0.49059360480816605,
"ce_loss": 0.4805936143449092,
"lb_loss": 0.9999999903559967
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 106000000,
"cumulative_training_bytes": 106006613,
"metrics": {
"loss": 0.4903415147116462,
"ce_loss": 0.4803415242483894,
"lb_loss": 0.9999999906561079
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 107000000,
"cumulative_training_bytes": 107005607,
"metrics": {
"loss": 0.4906465298378475,
"ce_loss": 0.4806465393745907,
"lb_loss": 0.9999999903976801
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 108000000,
"cumulative_training_bytes": 108001197,
"metrics": {
"loss": 0.4906608704421343,
"ce_loss": 0.48066087997887746,
"lb_loss": 0.9999999902877164
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 109000000,
"cumulative_training_bytes": 109001691,
"metrics": {
"loss": 0.49069485728372664,
"ce_loss": 0.4806948668204698,
"lb_loss": 0.9999999900325566
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 110000000,
"cumulative_training_bytes": 110007304,
"metrics": {
"loss": 0.4906437990875403,
"ce_loss": 0.48064380862428346,
"lb_loss": 0.9999999899985953
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 111000000,
"cumulative_training_bytes": 111006246,
"metrics": {
"loss": 0.49070311546325684,
"ce_loss": 0.480703125,
"lb_loss": 0.9999999900562008
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 112000000,
"cumulative_training_bytes": 112006808,
"metrics": {
"loss": 0.4907320227878786,
"ce_loss": 0.48073203232462175,
"lb_loss": 0.9999999894783181
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 113000000,
"cumulative_training_bytes": 113006280,
"metrics": {
"loss": 0.4907356900739835,
"ce_loss": 0.48073569961072665,
"lb_loss": 0.999999989610436
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 114000000,
"cumulative_training_bytes": 114000244,
"metrics": {
"loss": 0.4906710912515451,
"ce_loss": 0.4806711007882883,
"lb_loss": 0.9999999897974031
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 115000000,
"cumulative_training_bytes": 115000090,
"metrics": {
"loss": 0.49064408903496304,
"ce_loss": 0.4806440985717062,
"lb_loss": 0.9999999897608811
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 116000000,
"cumulative_training_bytes": 116003964,
"metrics": {
"loss": 0.4908688999492036,
"ce_loss": 0.48086890948594674,
"lb_loss": 0.9999999897499409
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 117000000,
"cumulative_training_bytes": 117001141,
"metrics": {
"loss": 0.49077886969755463,
"ce_loss": 0.4807788792342978,
"lb_loss": 0.9999999896522636
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 118000000,
"cumulative_training_bytes": 118002964,
"metrics": {
"loss": 0.49081061967910844,
"ce_loss": 0.4808106292158516,
"lb_loss": 0.9999999897073936
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 119000000,
"cumulative_training_bytes": 119004829,
"metrics": {
"loss": 0.49074190038735244,
"ce_loss": 0.4807419099240956,
"lb_loss": 0.9999999899118753
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 120000000,
"cumulative_training_bytes": 120005174,
"metrics": {
"loss": 0.49069510202198013,
"ce_loss": 0.4806951115587233,
"lb_loss": 0.999999989785755
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 121000000,
"cumulative_training_bytes": 121000398,
"metrics": {
"loss": 0.4906328099449369,
"ce_loss": 0.4806328194816801,
"lb_loss": 0.9999999898084403
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 122000000,
"cumulative_training_bytes": 122005153,
"metrics": {
"loss": 0.4905734521533371,
"ce_loss": 0.48057346169008025,
"lb_loss": 0.9999999895931111
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 123000000,
"cumulative_training_bytes": 123002062,
"metrics": {
"loss": 0.49056105234136627,
"ce_loss": 0.48056106187810943,
"lb_loss": 0.9999999894398626
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 124000000,
"cumulative_training_bytes": 124006089,
"metrics": {
"loss": 0.4904723872690717,
"ce_loss": 0.4804723968058149,
"lb_loss": 0.9999999896498737
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 125000000,
"cumulative_training_bytes": 125006477,
"metrics": {
"loss": 0.4903383307249222,
"ce_loss": 0.4803383402616654,
"lb_loss": 0.9999999898584517
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 126000000,
"cumulative_training_bytes": 126002630,
"metrics": {
"loss": 0.49058030584739254,
"ce_loss": 0.4805803153841357,
"lb_loss": 0.9999999897561486
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 127000000,
"cumulative_training_bytes": 127007067,
"metrics": {
"loss": 0.49066594004055153,
"ce_loss": 0.4806659495772947,
"lb_loss": 0.9999999898067419
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 128000000,
"cumulative_training_bytes": 128000583,
"metrics": {
"loss": 0.49058034760611396,
"ce_loss": 0.48058035714285713,
"lb_loss": 0.999999989768102
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 129000000,
"cumulative_training_bytes": 129007289,
"metrics": {
"loss": 0.4905069065050655,
"ce_loss": 0.4805069160418087,
"lb_loss": 0.9999999897476218
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 130000000,
"cumulative_training_bytes": 130006166,
"metrics": {
"loss": 0.49045753542133275,
"ce_loss": 0.4804575449580759,
"lb_loss": 0.9999999899782128
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 131000000,
"cumulative_training_bytes": 131001304,
"metrics": {
"loss": 0.4904289406187695,
"ce_loss": 0.4804289501555127,
"lb_loss": 0.9999999901426038
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 132000000,
"cumulative_training_bytes": 132007108,
"metrics": {
"loss": 0.4903701265992885,
"ce_loss": 0.4803701361360317,
"lb_loss": 0.9999999899394623
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 133000000,
"cumulative_training_bytes": 133003089,
"metrics": {
"loss": 0.49030012820954977,
"ce_loss": 0.48030013774629293,
"lb_loss": 0.9999999899266576
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 134000000,
"cumulative_training_bytes": 134000170,
"metrics": {
"loss": 0.49024726003084046,
"ce_loss": 0.4802472695675836,
"lb_loss": 0.999999989902716
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 135000000,
"cumulative_training_bytes": 135007268,
"metrics": {
"loss": 0.4902310506127265,
"ce_loss": 0.48023106014946965,
"lb_loss": 0.999999989883879
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 136000000,
"cumulative_training_bytes": 136002367,
"metrics": {
"loss": 0.49015822482355786,
"ce_loss": 0.48015823436030103,
"lb_loss": 0.9999999898845927
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 137000000,
"cumulative_training_bytes": 137002293,
"metrics": {
"loss": 0.49018864670178053,
"ce_loss": 0.4801886562385237,
"lb_loss": 0.9999999900512997
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 138000000,
"cumulative_training_bytes": 138004174,
"metrics": {
"loss": 0.4901451457887006,
"ce_loss": 0.4801451553254438,
"lb_loss": 0.9999999901139867
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 139000000,
"cumulative_training_bytes": 139006240,
"metrics": {
"loss": 0.4903390567955974,
"ce_loss": 0.4803390663323406,
"lb_loss": 0.999999990163354
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 140000000,
"cumulative_training_bytes": 140006436,
"metrics": {
"loss": 0.49048212032282185,
"ce_loss": 0.480482129859565,
"lb_loss": 0.9999999901594661
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 141000000,
"cumulative_training_bytes": 141007445,
"metrics": {
"loss": 0.4905080058343408,
"ce_loss": 0.48050801537108395,
"lb_loss": 0.9999999901041711
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 142000000,
"cumulative_training_bytes": 142004918,
"metrics": {
"loss": 0.4905039665249063,
"ce_loss": 0.48050397606164946,
"lb_loss": 0.9999999901685075
}
},
{
"epoch": 3,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.49051486986155374,
"ce_loss": 0.4805148793982969,
"lb_loss": 0.9999999901265442,
"training_bytes": 47653391
},
"cumulative_training_bytes": 142960216,
"training_bytes_this_epoch": 47653391
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 143000000,
"cumulative_training_bytes": 143005202,
"metrics": {
"loss": 0.4950260321299235,
"ce_loss": 0.4850260416666667,
"lb_loss": 0.9999999701976776
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 144000000,
"cumulative_training_bytes": 144006005,
"metrics": {
"loss": 0.4904259713026729,
"ce_loss": 0.48042598083941607,
"lb_loss": 0.9999999908635216
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 145000000,
"cumulative_training_bytes": 145001749,
"metrics": {
"loss": 0.4900371510437812,
"ce_loss": 0.48003716058052437,
"lb_loss": 0.9999999908472268
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 146000000,
"cumulative_training_bytes": 146005280,
"metrics": {
"loss": 0.4904491602627556,
"ce_loss": 0.48044916979949875,
"lb_loss": 0.9999999887961194
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 147000000,
"cumulative_training_bytes": 147006364,
"metrics": {
"loss": 0.49022183598212477,
"ce_loss": 0.48022184551886793,
"lb_loss": 0.9999999902158413
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 148000000,
"cumulative_training_bytes": 148004606,
"metrics": {
"loss": 0.4898206580768932,
"ce_loss": 0.47982066761363634,
"lb_loss": 0.9999999900658926
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 149000000,
"cumulative_training_bytes": 149001684,
"metrics": {
"loss": 0.48951690106452267,
"ce_loss": 0.47951691060126583,
"lb_loss": 0.9999999901161918
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 150000000,
"cumulative_training_bytes": 150003252,
"metrics": {
"loss": 0.4902524334599995,
"ce_loss": 0.4802524429967427,
"lb_loss": 0.9999999897099473
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 151000000,
"cumulative_training_bytes": 151004021,
"metrics": {
"loss": 0.4901546794499362,
"ce_loss": 0.48015468898667935,
"lb_loss": 0.9999999898484954
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 152000000,
"cumulative_training_bytes": 152003583,
"metrics": {
"loss": 0.4901396364200731,
"ce_loss": 0.48013964595681624,
"lb_loss": 0.9999999896032542
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 153000000,
"cumulative_training_bytes": 153004258,
"metrics": {
"loss": 0.49013379143505564,
"ce_loss": 0.4801338009717988,
"lb_loss": 0.9999999890058506
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 154000000,
"cumulative_training_bytes": 154004288,
"metrics": {
"loss": 0.4900680994376158,
"ce_loss": 0.480068108974359,
"lb_loss": 0.999999989632179
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 155000000,
"cumulative_training_bytes": 155004149,
"metrics": {
"loss": 0.4901411515178947,
"ce_loss": 0.4801411610546379,
"lb_loss": 0.9999999897755691
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 156000000,
"cumulative_training_bytes": 156001930,
"metrics": {
"loss": 0.4899712896123179,
"ce_loss": 0.47997129914906106,
"lb_loss": 0.9999999894012868
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 157000000,
"cumulative_training_bytes": 157005966,
"metrics": {
"loss": 0.4899014294959544,
"ce_loss": 0.47990143903269755,
"lb_loss": 0.9999999894758012
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 158000000,
"cumulative_training_bytes": 158006659,
"metrics": {
"loss": 0.48980809543528125,
"ce_loss": 0.4798081049720244,
"lb_loss": 0.9999999895403854
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 159000000,
"cumulative_training_bytes": 159001028,
"metrics": {
"loss": 0.4895588359286506,
"ce_loss": 0.4795588454653938,
"lb_loss": 0.9999999895585181
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 160000000,
"cumulative_training_bytes": 160001860,
"metrics": {
"loss": 0.4894983198657726,
"ce_loss": 0.47949832940251574,
"lb_loss": 0.9999999894232549
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 161000000,
"cumulative_training_bytes": 161000396,
"metrics": {
"loss": 0.4892045148159733,
"ce_loss": 0.4792045243527165,
"lb_loss": 0.9999999891972906
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 162000000,
"cumulative_training_bytes": 162002358,
"metrics": {
"loss": 0.4891760811347486,
"ce_loss": 0.47917609067149175,
"lb_loss": 0.9999999890233505
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 163000000,
"cumulative_training_bytes": 163000910,
"metrics": {
"loss": 0.4890335630177085,
"ce_loss": 0.47903357255445167,
"lb_loss": 0.9999999890675471
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 164000000,
"cumulative_training_bytes": 164005597,
"metrics": {
"loss": 0.48890226029586237,
"ce_loss": 0.47890226983260553,
"lb_loss": 0.9999999888729321
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 165000000,
"cumulative_training_bytes": 165002975,
"metrics": {
"loss": 0.4889194060730553,
"ce_loss": 0.47891941560979845,
"lb_loss": 0.9999999890234671
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 166000000,
"cumulative_training_bytes": 166007294,
"metrics": {
"loss": 0.48903683825322025,
"ce_loss": 0.4790368477899634,
"lb_loss": 0.9999999888872696
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 167000000,
"cumulative_training_bytes": 167001945,
"metrics": {
"loss": 0.4890494737780068,
"ce_loss": 0.47904948331474995,
"lb_loss": 0.9999999891006479
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 168000000,
"cumulative_training_bytes": 168005336,
"metrics": {
"loss": 0.48906435342565363,
"ce_loss": 0.4790643629623968,
"lb_loss": 0.9999999890849336
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 169000000,
"cumulative_training_bytes": 169002071,
"metrics": {
"loss": 0.48898078195840533,
"ce_loss": 0.4789807914951485,
"lb_loss": 0.9999999892392673
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 170000000,
"cumulative_training_bytes": 170002507,
"metrics": {
"loss": 0.48883532836328514,
"ce_loss": 0.4788353379000283,
"lb_loss": 0.9999999893484761
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 171000000,
"cumulative_training_bytes": 171005319,
"metrics": {
"loss": 0.48872788846981063,
"ce_loss": 0.4787278980065538,
"lb_loss": 0.9999999894365335
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 172000000,
"cumulative_training_bytes": 172007475,
"metrics": {
"loss": 0.4886464073825819,
"ce_loss": 0.4786464169193251,
"lb_loss": 0.999999989424222
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 173000000,
"cumulative_training_bytes": 173006995,
"metrics": {
"loss": 0.48865697313400097,
"ce_loss": 0.47865698267074414,
"lb_loss": 0.9999999893671633
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 174000000,
"cumulative_training_bytes": 174002372,
"metrics": {
"loss": 0.48858499138826916,
"ce_loss": 0.4785850009250123,
"lb_loss": 0.9999999893993713
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 175000000,
"cumulative_training_bytes": 175000872,
"metrics": {
"loss": 0.48849087463510193,
"ce_loss": 0.4784908841718451,
"lb_loss": 0.9999999894580696
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 176000000,
"cumulative_training_bytes": 176007018,
"metrics": {
"loss": 0.4885006819310511,
"ce_loss": 0.4785006914677943,
"lb_loss": 0.9999999893523677
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 177000000,
"cumulative_training_bytes": 177003062,
"metrics": {
"loss": 0.4884071085188124,
"ce_loss": 0.4784071180555556,
"lb_loss": 0.9999999894492003
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 178000000,
"cumulative_training_bytes": 178005739,
"metrics": {
"loss": 0.4883760760542553,
"ce_loss": 0.4783760855909985,
"lb_loss": 0.9999999893214313
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 179000000,
"cumulative_training_bytes": 179002039,
"metrics": {
"loss": 0.48841644468038026,
"ce_loss": 0.4784164542171234,
"lb_loss": 0.9999999892871193
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 180000000,
"cumulative_training_bytes": 180001975,
"metrics": {
"loss": 0.4885168265783871,
"ce_loss": 0.47851683611513024,
"lb_loss": 0.9999999893307933
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 181000000,
"cumulative_training_bytes": 181002156,
"metrics": {
"loss": 0.4885435228641423,
"ce_loss": 0.4785435324008855,
"lb_loss": 0.9999999895041126
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 182000000,
"cumulative_training_bytes": 182006789,
"metrics": {
"loss": 0.48842715038972745,
"ce_loss": 0.4784271599264706,
"lb_loss": 0.9999999895516564
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 183000000,
"cumulative_training_bytes": 183001003,
"metrics": {
"loss": 0.4883744527003505,
"ce_loss": 0.4783744622370937,
"lb_loss": 0.9999999895720363
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 184000000,
"cumulative_training_bytes": 184002846,
"metrics": {
"loss": 0.4883268971737586,
"ce_loss": 0.47832690671050176,
"lb_loss": 0.9999999894599509
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 185000000,
"cumulative_training_bytes": 185004724,
"metrics": {
"loss": 0.4882663181899502,
"ce_loss": 0.47826632772669336,
"lb_loss": 0.9999999894400365
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 186000000,
"cumulative_training_bytes": 186007260,
"metrics": {
"loss": 0.48828150444600626,
"ce_loss": 0.47828151398274943,
"lb_loss": 0.9999999894740508
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 187000000,
"cumulative_training_bytes": 187007019,
"metrics": {
"loss": 0.4882290801773191,
"ce_loss": 0.47822908971406225,
"lb_loss": 0.9999999894010861
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 188000000,
"cumulative_training_bytes": 188003736,
"metrics": {
"loss": 0.488216156216274,
"ce_loss": 0.4782161657530172,
"lb_loss": 0.9999999894326629
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 189000000,
"cumulative_training_bytes": 189007403,
"metrics": {
"loss": 0.4881525303701408,
"ce_loss": 0.47815253990688394,
"lb_loss": 0.9999999895538252
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 190000000,
"cumulative_training_bytes": 190003337,
"metrics": {
"loss": 0.4880743821461995,
"ce_loss": 0.4780743916829427,
"lb_loss": 0.9999999895905299
}
},
{
"epoch": 4,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.488021058104645,
"ce_loss": 0.4780210676413882,
"lb_loss": 0.9999999895423727,
"training_bytes": 47653398
},
"cumulative_training_bytes": 190613614,
"training_bytes_this_epoch": 47653398
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 191000000,
"cumulative_training_bytes": 191004295,
"metrics": {
"loss": 0.48361365467894313,
"ce_loss": 0.4736136642156863,
"lb_loss": 0.9999999906502518
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 192000000,
"cumulative_training_bytes": 192003486,
"metrics": {
"loss": 0.4822246106290027,
"ce_loss": 0.47222462016574585,
"lb_loss": 0.9999999911086994
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 193000000,
"cumulative_training_bytes": 193000756,
"metrics": {
"loss": 0.48206590686197065,
"ce_loss": 0.4720659163987138,
"lb_loss": 0.9999999875424376
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 194000000,
"cumulative_training_bytes": 194006438,
"metrics": {
"loss": 0.4826827534723066,
"ce_loss": 0.47268276300904977,
"lb_loss": 0.9999999888072726
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 195000000,
"cumulative_training_bytes": 195005382,
"metrics": {
"loss": 0.48297037944927085,
"ce_loss": 0.472970388986014,
"lb_loss": 0.9999999882249565
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 196000000,
"cumulative_training_bytes": 196002015,
"metrics": {
"loss": 0.4832766776071315,
"ce_loss": 0.47327668714387466,
"lb_loss": 0.999999988792289
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 197000000,
"cumulative_training_bytes": 197006361,
"metrics": {
"loss": 0.48392085377260935,
"ce_loss": 0.4739208633093525,
"lb_loss": 0.9999999878503721
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 198000000,
"cumulative_training_bytes": 198003880,
"metrics": {
"loss": 0.483928608201846,
"ce_loss": 0.4739286177385892,
"lb_loss": 0.99999998751023
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 199000000,
"cumulative_training_bytes": 199006196,
"metrics": {
"loss": 0.48404037288334817,
"ce_loss": 0.47404038242009133,
"lb_loss": 0.9999999879702041
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 200000000,
"cumulative_training_bytes": 200002073,
"metrics": {
"loss": 0.4839540720959099,
"ce_loss": 0.47395408163265307,
"lb_loss": 0.999999988760267
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 201000000,
"cumulative_training_bytes": 201002611,
"metrics": {
"loss": 0.4842147074617819,
"ce_loss": 0.4742147169985251,
"lb_loss": 0.9999999887032495
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 202000000,
"cumulative_training_bytes": 202000755,
"metrics": {
"loss": 0.48400739288586786,
"ce_loss": 0.474007402422611,
"lb_loss": 0.9999999884480903
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 203000000,
"cumulative_training_bytes": 203001562,
"metrics": {
"loss": 0.4841745324391381,
"ce_loss": 0.47417454197588127,
"lb_loss": 0.9999999887573181
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 204000000,
"cumulative_training_bytes": 204005682,
"metrics": {
"loss": 0.48423728844666647,
"ce_loss": 0.47423729798340963,
"lb_loss": 0.9999999889179008
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 205000000,
"cumulative_training_bytes": 205003502,
"metrics": {
"loss": 0.484211044443555,
"ce_loss": 0.47421105398029817,
"lb_loss": 0.9999999888598348
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 206000000,
"cumulative_training_bytes": 206008019,
"metrics": {
"loss": 0.48419132477137033,
"ce_loss": 0.4741913343081135,
"lb_loss": 0.9999999889632016
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 207000000,
"cumulative_training_bytes": 207007717,
"metrics": {
"loss": 0.4842689376011073,
"ce_loss": 0.47426894713785045,
"lb_loss": 0.9999999892488818
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 208000000,
"cumulative_training_bytes": 208005208,
"metrics": {
"loss": 0.4841757200888075,
"ce_loss": 0.47417572962555066,
"lb_loss": 0.999999989313176
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 209000000,
"cumulative_training_bytes": 209005329,
"metrics": {
"loss": 0.4841251532236735,
"ce_loss": 0.47412516276041666,
"lb_loss": 0.9999999895443519
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 210000000,
"cumulative_training_bytes": 210006121,
"metrics": {
"loss": 0.4841085443945093,
"ce_loss": 0.47410855393125245,
"lb_loss": 0.9999999895909708
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 211000000,
"cumulative_training_bytes": 211003532,
"metrics": {
"loss": 0.48414475511940114,
"ce_loss": 0.4741447646561443,
"lb_loss": 0.9999999894947094
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 212000000,
"cumulative_training_bytes": 212007723,
"metrics": {
"loss": 0.48415256366347176,
"ce_loss": 0.4741525732002149,
"lb_loss": 0.9999999894966027
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 213000000,
"cumulative_training_bytes": 213005205,
"metrics": {
"loss": 0.48423584617656507,
"ce_loss": 0.47423585571330823,
"lb_loss": 0.9999999894371515
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 214000000,
"cumulative_training_bytes": 214007542,
"metrics": {
"loss": 0.4842972747625365,
"ce_loss": 0.47429728429927964,
"lb_loss": 0.9999999894023176
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 215000000,
"cumulative_training_bytes": 215006636,
"metrics": {
"loss": 0.48420266889447544,
"ce_loss": 0.4742026784312186,
"lb_loss": 0.9999999897414117
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 216000000,
"cumulative_training_bytes": 216002411,
"metrics": {
"loss": 0.48442725453235763,
"ce_loss": 0.4744272640691008,
"lb_loss": 0.9999999896582165
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 217000000,
"cumulative_training_bytes": 217003351,
"metrics": {
"loss": 0.48465302021652246,
"ce_loss": 0.4746530297532656,
"lb_loss": 0.9999999896708351
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 218000000,
"cumulative_training_bytes": 218001934,
"metrics": {
"loss": 0.48466454465906106,
"ce_loss": 0.4746645541958042,
"lb_loss": 0.9999999894795718
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 219000000,
"cumulative_training_bytes": 219001498,
"metrics": {
"loss": 0.48470003685327034,
"ce_loss": 0.4747000463900135,
"lb_loss": 0.9999999894626067
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 220000000,
"cumulative_training_bytes": 220000802,
"metrics": {
"loss": 0.48471733482440416,
"ce_loss": 0.4747173443611473,
"lb_loss": 0.9999999896022145
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 221000000,
"cumulative_training_bytes": 221002631,
"metrics": {
"loss": 0.4847172157847394,
"ce_loss": 0.4747172253214826,
"lb_loss": 0.9999999895699386
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 222000000,
"cumulative_training_bytes": 222001546,
"metrics": {
"loss": 0.48476429971392804,
"ce_loss": 0.4747643092506712,
"lb_loss": 0.9999999897434038
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 223000000,
"cumulative_training_bytes": 223002095,
"metrics": {
"loss": 0.4846939223298289,
"ce_loss": 0.47469393186657205,
"lb_loss": 0.9999999896217131
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 224000000,
"cumulative_training_bytes": 224004704,
"metrics": {
"loss": 0.4846698684175937,
"ce_loss": 0.47466987795433685,
"lb_loss": 0.9999999897695562
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 225000000,
"cumulative_training_bytes": 225002022,
"metrics": {
"loss": 0.48464635646704474,
"ce_loss": 0.4746463660037879,
"lb_loss": 0.9999999898666792
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 226000000,
"cumulative_training_bytes": 226003031,
"metrics": {
"loss": 0.4846968945550516,
"ce_loss": 0.47469690409179477,
"lb_loss": 0.999999989960508
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 227000000,
"cumulative_training_bytes": 227007213,
"metrics": {
"loss": 0.48466569770009893,
"ce_loss": 0.4746657072368421,
"lb_loss": 0.9999999899738713
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 228000000,
"cumulative_training_bytes": 228002618,
"metrics": {
"loss": 0.4847170054192085,
"ce_loss": 0.47471701495595164,
"lb_loss": 0.9999999898766133
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 229000000,
"cumulative_training_bytes": 229002817,
"metrics": {
"loss": 0.4849277419846056,
"ce_loss": 0.47492775152134875,
"lb_loss": 0.9999999898439013
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 230000000,
"cumulative_training_bytes": 230004657,
"metrics": {
"loss": 0.48519230282958,
"ce_loss": 0.47519231236632314,
"lb_loss": 0.9999999897664978
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 231000000,
"cumulative_training_bytes": 231006924,
"metrics": {
"loss": 0.4853118831206326,
"ce_loss": 0.4753118926573758,
"lb_loss": 0.9999999898737654
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 232000000,
"cumulative_training_bytes": 232007018,
"metrics": {
"loss": 0.4854375916427203,
"ce_loss": 0.4754376011794635,
"lb_loss": 0.9999999898104178
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 233000000,
"cumulative_training_bytes": 233006236,
"metrics": {
"loss": 0.48551042782778,
"ce_loss": 0.47551043736452314,
"lb_loss": 0.9999999898362022
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 234000000,
"cumulative_training_bytes": 234000486,
"metrics": {
"loss": 0.4855104365451488,
"ce_loss": 0.47551044608189197,
"lb_loss": 0.9999999898169262
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 235000000,
"cumulative_training_bytes": 235002824,
"metrics": {
"loss": 0.485555099787045,
"ce_loss": 0.47555510932378814,
"lb_loss": 0.9999999898311206
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 236000000,
"cumulative_training_bytes": 236004788,
"metrics": {
"loss": 0.48557505530384387,
"ce_loss": 0.47557506484058704,
"lb_loss": 0.9999999897843591
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 237000000,
"cumulative_training_bytes": 237001532,
"metrics": {
"loss": 0.4854937203452311,
"ce_loss": 0.47549372988197425,
"lb_loss": 0.999999989826477
}
},
{
"checkpoint_type": "bytes",
"bytes_threshold": 238000000,
"cumulative_training_bytes": 238004993,
"metrics": {
"loss": 0.485468689970447,
"ce_loss": 0.4754686995071902,
"lb_loss": 0.9999999898203087
}
},
{
"epoch": 5,
"checkpoint_type": "epoch",
"metrics": {
"loss": 0.48547107517566046,
"ce_loss": 0.4754710847124036,
"lb_loss": 0.9999999897626342,
"training_bytes": 47653400
},
"cumulative_training_bytes": 238267014,
"training_bytes_this_epoch": 47653400
}
]
}