G-MATRIX-Embedding-v1 / trainer_state.json
mssongit's picture
Upload GTE Embedding Model
a3552d8 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 2700,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.037037037037037035,
"grad_norm": 4.8827619552612305,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.2228,
"step": 100
},
{
"epoch": 0.037037037037037035,
"eval_all-nli-dev_cosine_accuracy": 0.9889583333333334,
"eval_loss": 0.10640299320220947,
"eval_runtime": 97.5038,
"eval_samples_per_second": 147.687,
"eval_steps_per_second": 9.23,
"step": 100
},
{
"epoch": 0.07407407407407407,
"grad_norm": 2.8969228267669678,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.1292,
"step": 200
},
{
"epoch": 0.07407407407407407,
"eval_all-nli-dev_cosine_accuracy": 0.9938194444444445,
"eval_loss": 0.05102457106113434,
"eval_runtime": 99.0099,
"eval_samples_per_second": 145.44,
"eval_steps_per_second": 9.09,
"step": 200
},
{
"epoch": 0.1111111111111111,
"grad_norm": 1.966164469718933,
"learning_rate": 9.876543209876543e-06,
"loss": 0.0785,
"step": 300
},
{
"epoch": 0.1111111111111111,
"eval_all-nli-dev_cosine_accuracy": 0.9944444444444445,
"eval_loss": 0.040028076618909836,
"eval_runtime": 98.8666,
"eval_samples_per_second": 145.651,
"eval_steps_per_second": 9.103,
"step": 300
},
{
"epoch": 0.14814814814814814,
"grad_norm": 1.080277919769287,
"learning_rate": 9.465020576131688e-06,
"loss": 0.0675,
"step": 400
},
{
"epoch": 0.14814814814814814,
"eval_all-nli-dev_cosine_accuracy": 0.9954861111111111,
"eval_loss": 0.03450320288538933,
"eval_runtime": 99.5727,
"eval_samples_per_second": 144.618,
"eval_steps_per_second": 9.039,
"step": 400
},
{
"epoch": 0.18518518518518517,
"grad_norm": 1.5480653047561646,
"learning_rate": 9.053497942386832e-06,
"loss": 0.0667,
"step": 500
},
{
"epoch": 0.18518518518518517,
"eval_all-nli-dev_cosine_accuracy": 0.9952777777777778,
"eval_loss": 0.031959593296051025,
"eval_runtime": 97.8179,
"eval_samples_per_second": 147.212,
"eval_steps_per_second": 9.201,
"step": 500
},
{
"epoch": 0.2222222222222222,
"grad_norm": 1.9074684381484985,
"learning_rate": 8.641975308641975e-06,
"loss": 0.0644,
"step": 600
},
{
"epoch": 0.2222222222222222,
"eval_all-nli-dev_cosine_accuracy": 0.995625,
"eval_loss": 0.030600089579820633,
"eval_runtime": 98.4261,
"eval_samples_per_second": 146.303,
"eval_steps_per_second": 9.144,
"step": 600
},
{
"epoch": 0.25925925925925924,
"grad_norm": 3.8694491386413574,
"learning_rate": 8.23045267489712e-06,
"loss": 0.067,
"step": 700
},
{
"epoch": 0.25925925925925924,
"eval_all-nli-dev_cosine_accuracy": 0.9959027777777778,
"eval_loss": 0.030407674610614777,
"eval_runtime": 98.4984,
"eval_samples_per_second": 146.195,
"eval_steps_per_second": 9.137,
"step": 700
},
{
"epoch": 0.2962962962962963,
"grad_norm": 1.7101613283157349,
"learning_rate": 7.818930041152263e-06,
"loss": 0.0568,
"step": 800
},
{
"epoch": 0.2962962962962963,
"eval_all-nli-dev_cosine_accuracy": 0.995625,
"eval_loss": 0.02961079403758049,
"eval_runtime": 98.1294,
"eval_samples_per_second": 146.745,
"eval_steps_per_second": 9.172,
"step": 800
},
{
"epoch": 0.3333333333333333,
"grad_norm": 2.4039230346679688,
"learning_rate": 7.4074074074074075e-06,
"loss": 0.0617,
"step": 900
},
{
"epoch": 0.3333333333333333,
"eval_all-nli-dev_cosine_accuracy": 0.9957638888888889,
"eval_loss": 0.0286862775683403,
"eval_runtime": 97.8253,
"eval_samples_per_second": 147.201,
"eval_steps_per_second": 9.2,
"step": 900
},
{
"epoch": 0.37037037037037035,
"grad_norm": 1.3637861013412476,
"learning_rate": 6.9958847736625525e-06,
"loss": 0.0556,
"step": 1000
},
{
"epoch": 0.37037037037037035,
"eval_all-nli-dev_cosine_accuracy": 0.99625,
"eval_loss": 0.027397217229008675,
"eval_runtime": 98.5282,
"eval_samples_per_second": 146.151,
"eval_steps_per_second": 9.134,
"step": 1000
},
{
"epoch": 0.4074074074074074,
"grad_norm": 2.049680709838867,
"learning_rate": 6.584362139917696e-06,
"loss": 0.0532,
"step": 1100
},
{
"epoch": 0.4074074074074074,
"eval_all-nli-dev_cosine_accuracy": 0.99625,
"eval_loss": 0.027111150324344635,
"eval_runtime": 99.6305,
"eval_samples_per_second": 144.534,
"eval_steps_per_second": 9.033,
"step": 1100
},
{
"epoch": 0.4444444444444444,
"grad_norm": 1.6650844812393188,
"learning_rate": 6.17283950617284e-06,
"loss": 0.0524,
"step": 1200
},
{
"epoch": 0.4444444444444444,
"eval_all-nli-dev_cosine_accuracy": 0.9965972222222222,
"eval_loss": 0.026169853284955025,
"eval_runtime": 99.4848,
"eval_samples_per_second": 144.746,
"eval_steps_per_second": 9.047,
"step": 1200
},
{
"epoch": 0.48148148148148145,
"grad_norm": 2.308643341064453,
"learning_rate": 5.761316872427984e-06,
"loss": 0.0529,
"step": 1300
},
{
"epoch": 0.48148148148148145,
"eval_all-nli-dev_cosine_accuracy": 0.9961805555555555,
"eval_loss": 0.026670673862099648,
"eval_runtime": 105.3249,
"eval_samples_per_second": 136.72,
"eval_steps_per_second": 8.545,
"step": 1300
},
{
"epoch": 0.5185185185185185,
"grad_norm": 1.1921712160110474,
"learning_rate": 5.349794238683128e-06,
"loss": 0.0527,
"step": 1400
},
{
"epoch": 0.5185185185185185,
"eval_all-nli-dev_cosine_accuracy": 0.9961805555555555,
"eval_loss": 0.025993267074227333,
"eval_runtime": 101.3038,
"eval_samples_per_second": 142.147,
"eval_steps_per_second": 8.884,
"step": 1400
},
{
"epoch": 0.5555555555555556,
"grad_norm": 2.8418076038360596,
"learning_rate": 4.938271604938272e-06,
"loss": 0.0479,
"step": 1500
},
{
"epoch": 0.5555555555555556,
"eval_all-nli-dev_cosine_accuracy": 0.99625,
"eval_loss": 0.025305895134806633,
"eval_runtime": 101.7867,
"eval_samples_per_second": 141.472,
"eval_steps_per_second": 8.842,
"step": 1500
},
{
"epoch": 0.5925925925925926,
"grad_norm": 3.0896897315979004,
"learning_rate": 4.526748971193416e-06,
"loss": 0.0515,
"step": 1600
},
{
"epoch": 0.5925925925925926,
"eval_all-nli-dev_cosine_accuracy": 0.9966666666666667,
"eval_loss": 0.024532195180654526,
"eval_runtime": 101.8042,
"eval_samples_per_second": 141.448,
"eval_steps_per_second": 8.84,
"step": 1600
},
{
"epoch": 0.6296296296296297,
"grad_norm": 2.7592620849609375,
"learning_rate": 4.11522633744856e-06,
"loss": 0.0512,
"step": 1700
},
{
"epoch": 0.6296296296296297,
"eval_all-nli-dev_cosine_accuracy": 0.9961805555555555,
"eval_loss": 0.025122441351413727,
"eval_runtime": 102.3766,
"eval_samples_per_second": 140.657,
"eval_steps_per_second": 8.791,
"step": 1700
},
{
"epoch": 0.6666666666666666,
"grad_norm": 0.41445350646972656,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.0548,
"step": 1800
},
{
"epoch": 0.6666666666666666,
"eval_all-nli-dev_cosine_accuracy": 0.9963194444444444,
"eval_loss": 0.024524033069610596,
"eval_runtime": 97.2602,
"eval_samples_per_second": 148.056,
"eval_steps_per_second": 9.254,
"step": 1800
},
{
"epoch": 0.7037037037037037,
"grad_norm": 1.6982859373092651,
"learning_rate": 3.292181069958848e-06,
"loss": 0.0476,
"step": 1900
},
{
"epoch": 0.7037037037037037,
"eval_all-nli-dev_cosine_accuracy": 0.9964583333333333,
"eval_loss": 0.024558432400226593,
"eval_runtime": 99.9108,
"eval_samples_per_second": 144.129,
"eval_steps_per_second": 9.008,
"step": 1900
},
{
"epoch": 0.7407407407407407,
"grad_norm": 1.9297990798950195,
"learning_rate": 2.880658436213992e-06,
"loss": 0.0456,
"step": 2000
},
{
"epoch": 0.7407407407407407,
"eval_all-nli-dev_cosine_accuracy": 0.9961111111111111,
"eval_loss": 0.024668598547577858,
"eval_runtime": 106.8633,
"eval_samples_per_second": 134.752,
"eval_steps_per_second": 8.422,
"step": 2000
},
{
"epoch": 0.7777777777777778,
"grad_norm": 1.5807716846466064,
"learning_rate": 2.469135802469136e-06,
"loss": 0.0548,
"step": 2100
},
{
"epoch": 0.7777777777777778,
"eval_all-nli-dev_cosine_accuracy": 0.9964583333333333,
"eval_loss": 0.024200452491641045,
"eval_runtime": 101.8908,
"eval_samples_per_second": 141.328,
"eval_steps_per_second": 8.833,
"step": 2100
},
{
"epoch": 0.8148148148148148,
"grad_norm": 4.243816375732422,
"learning_rate": 2.05761316872428e-06,
"loss": 0.051,
"step": 2200
},
{
"epoch": 0.8148148148148148,
"eval_all-nli-dev_cosine_accuracy": 0.9964583333333333,
"eval_loss": 0.024141203612089157,
"eval_runtime": 101.3185,
"eval_samples_per_second": 142.126,
"eval_steps_per_second": 8.883,
"step": 2200
},
{
"epoch": 0.8518518518518519,
"grad_norm": 1.1512444019317627,
"learning_rate": 1.646090534979424e-06,
"loss": 0.0472,
"step": 2300
},
{
"epoch": 0.8518518518518519,
"eval_all-nli-dev_cosine_accuracy": 0.9967361111111112,
"eval_loss": 0.02424301952123642,
"eval_runtime": 100.0984,
"eval_samples_per_second": 143.858,
"eval_steps_per_second": 8.991,
"step": 2300
},
{
"epoch": 0.8888888888888888,
"grad_norm": 0.8177826404571533,
"learning_rate": 1.234567901234568e-06,
"loss": 0.0492,
"step": 2400
},
{
"epoch": 0.8888888888888888,
"eval_all-nli-dev_cosine_accuracy": 0.9967361111111112,
"eval_loss": 0.024101639166474342,
"eval_runtime": 100.6902,
"eval_samples_per_second": 143.013,
"eval_steps_per_second": 8.938,
"step": 2400
},
{
"epoch": 0.9259259259259259,
"grad_norm": 0.5140101909637451,
"learning_rate": 8.23045267489712e-07,
"loss": 0.0463,
"step": 2500
},
{
"epoch": 0.9259259259259259,
"eval_all-nli-dev_cosine_accuracy": 0.9967361111111112,
"eval_loss": 0.02386292815208435,
"eval_runtime": 101.9918,
"eval_samples_per_second": 141.188,
"eval_steps_per_second": 8.824,
"step": 2500
},
{
"epoch": 0.9629629629629629,
"grad_norm": 3.3629631996154785,
"learning_rate": 4.11522633744856e-07,
"loss": 0.0484,
"step": 2600
},
{
"epoch": 0.9629629629629629,
"eval_all-nli-dev_cosine_accuracy": 0.9966666666666667,
"eval_loss": 0.02382882498204708,
"eval_runtime": 100.8961,
"eval_samples_per_second": 142.721,
"eval_steps_per_second": 8.92,
"step": 2600
},
{
"epoch": 1.0,
"grad_norm": 2.4896204471588135,
"learning_rate": 0.0,
"loss": 0.0498,
"step": 2700
},
{
"epoch": 1.0,
"eval_all-nli-dev_cosine_accuracy": 0.9967361111111112,
"eval_loss": 0.023831075057387352,
"eval_runtime": 100.2374,
"eval_samples_per_second": 143.659,
"eval_steps_per_second": 8.979,
"step": 2700
}
],
"logging_steps": 100,
"max_steps": 2700,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}