rick-qwen3-4b-lora / checkpoint-285 /trainer_state.json
Aananda-giri's picture
Upload Rick dialogue fine-tuned model
6435163 verified
{
"best_global_step": 190,
"best_metric": 2.3478477001190186,
"best_model_checkpoint": "rick-qwen-finetuned/checkpoint-190",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 285,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"entropy": 2.1721158534288407,
"epoch": 0.10610079575596817,
"grad_norm": 4.033281326293945,
"learning_rate": 0.00015517241379310346,
"loss": 4.8926,
"mean_token_accuracy": 0.37371344938874246,
"num_tokens": 2064.0,
"step": 10
},
{
"entropy": 2.7347501456737517,
"epoch": 0.21220159151193635,
"grad_norm": 7.625450134277344,
"learning_rate": 0.0003275862068965517,
"loss": 2.7008,
"mean_token_accuracy": 0.5224792890250682,
"num_tokens": 4434.0,
"step": 20
},
{
"entropy": 2.2419000223278998,
"epoch": 0.3183023872679045,
"grad_norm": 1.8114349842071533,
"learning_rate": 0.0005,
"loss": 2.4624,
"mean_token_accuracy": 0.5613403469324112,
"num_tokens": 6665.0,
"step": 30
},
{
"entropy": 2.4120024889707565,
"epoch": 0.4244031830238727,
"grad_norm": 1.839812159538269,
"learning_rate": 0.0004981198836496775,
"loss": 2.5023,
"mean_token_accuracy": 0.5351990483701229,
"num_tokens": 8954.0,
"step": 40
},
{
"entropy": 2.2711548835039137,
"epoch": 0.5305039787798409,
"grad_norm": 2.454310417175293,
"learning_rate": 0.000492507813298636,
"loss": 2.5445,
"mean_token_accuracy": 0.561315081268549,
"num_tokens": 10917.0,
"step": 50
},
{
"entropy": 2.3858062833547593,
"epoch": 0.636604774535809,
"grad_norm": 12.571678161621094,
"learning_rate": 0.00048324819970868473,
"loss": 2.4936,
"mean_token_accuracy": 0.5213245801627636,
"num_tokens": 13163.0,
"step": 60
},
{
"entropy": 2.3039768010377886,
"epoch": 0.7427055702917772,
"grad_norm": 1.9571681022644043,
"learning_rate": 0.00047048031608708875,
"loss": 2.4157,
"mean_token_accuracy": 0.5572853125631809,
"num_tokens": 15404.0,
"step": 70
},
{
"entropy": 2.301254630088806,
"epoch": 0.8488063660477454,
"grad_norm": 1.962308406829834,
"learning_rate": 0.00045439620328789593,
"loss": 2.4493,
"mean_token_accuracy": 0.5377297826111317,
"num_tokens": 17936.0,
"step": 80
},
{
"entropy": 2.3037476420402525,
"epoch": 0.9549071618037135,
"grad_norm": 2.418731451034546,
"learning_rate": 0.0004352377813387398,
"loss": 2.4503,
"mean_token_accuracy": 0.5464393310248852,
"num_tokens": 20214.0,
"step": 90
},
{
"epoch": 1.0,
"eval_entropy": 2.167915307340168,
"eval_loss": 2.4205760955810547,
"eval_mean_token_accuracy": 0.5355801603623799,
"eval_num_tokens": 21095.0,
"eval_runtime": 18.0724,
"eval_samples_per_second": 2.324,
"eval_steps_per_second": 2.324,
"step": 95
},
{
"entropy": 2.109248090434719,
"epoch": 1.0530503978779842,
"grad_norm": 1.6356357336044312,
"learning_rate": 0.0004132932107384442,
"loss": 2.0461,
"mean_token_accuracy": 0.5923033171408886,
"num_tokens": 22568.0,
"step": 100
},
{
"entropy": 1.6169874876737595,
"epoch": 1.1591511936339522,
"grad_norm": 3.5763280391693115,
"learning_rate": 0.00038889255825490053,
"loss": 1.6699,
"mean_token_accuracy": 0.6313242256641388,
"num_tokens": 24985.0,
"step": 110
},
{
"entropy": 1.4064562141895294,
"epoch": 1.2652519893899203,
"grad_norm": 2.4848248958587646,
"learning_rate": 0.0003624028324136517,
"loss": 1.3988,
"mean_token_accuracy": 0.6858954817056656,
"num_tokens": 27037.0,
"step": 120
},
{
"entropy": 1.5654033362865447,
"epoch": 1.3713527851458887,
"grad_norm": 2.5782299041748047,
"learning_rate": 0.00033422246334805503,
"loss": 1.6258,
"mean_token_accuracy": 0.6557254463434219,
"num_tokens": 29468.0,
"step": 130
},
{
"entropy": 1.4926183179020882,
"epoch": 1.4774535809018567,
"grad_norm": 2.629681348800659,
"learning_rate": 0.0003047753100392174,
"loss": 1.5027,
"mean_token_accuracy": 0.6912050604820251,
"num_tokens": 31213.0,
"step": 140
},
{
"entropy": 1.383179245889187,
"epoch": 1.5835543766578248,
"grad_norm": 2.4897210597991943,
"learning_rate": 0.0002745042850823902,
"loss": 1.5551,
"mean_token_accuracy": 0.6832514323294163,
"num_tokens": 33509.0,
"step": 150
},
{
"entropy": 1.6858038201928138,
"epoch": 1.6896551724137931,
"grad_norm": 2.9458775520324707,
"learning_rate": 0.00024386469286927196,
"loss": 1.6896,
"mean_token_accuracy": 0.651208619773388,
"num_tokens": 35740.0,
"step": 160
},
{
"entropy": 1.3946410089731216,
"epoch": 1.7957559681697612,
"grad_norm": 2.711728572845459,
"learning_rate": 0.00021331738138615958,
"loss": 1.3121,
"mean_token_accuracy": 0.6985804051160812,
"num_tokens": 37788.0,
"step": 170
},
{
"entropy": 1.492286352813244,
"epoch": 1.9018567639257293,
"grad_norm": 2.2474756240844727,
"learning_rate": 0.00018332181063127542,
"loss": 1.5417,
"mean_token_accuracy": 0.6719372659921646,
"num_tokens": 40106.0,
"step": 180
},
{
"entropy": 1.45025778461147,
"epoch": 2.0,
"grad_norm": 4.529512405395508,
"learning_rate": 0.00015432914190872756,
"loss": 1.5463,
"mean_token_accuracy": 0.6596984573312707,
"num_tokens": 42190.0,
"step": 190
},
{
"epoch": 2.0,
"eval_entropy": 1.8587926966803414,
"eval_loss": 2.3478477001190186,
"eval_mean_token_accuracy": 0.5531174611477625,
"eval_num_tokens": 42190.0,
"eval_runtime": 18.1194,
"eval_samples_per_second": 2.318,
"eval_steps_per_second": 2.318,
"step": 190
},
{
"entropy": 0.9160950664430857,
"epoch": 2.1061007957559683,
"grad_norm": 2.8789069652557373,
"learning_rate": 0.00012677545194255402,
"loss": 0.7246,
"mean_token_accuracy": 0.8295891240239144,
"num_tokens": 44864.0,
"step": 200
},
{
"entropy": 0.7227096475660801,
"epoch": 2.212201591511936,
"grad_norm": 2.2743287086486816,
"learning_rate": 0.00010107517387689166,
"loss": 0.7039,
"mean_token_accuracy": 0.8215794518589974,
"num_tokens": 47199.0,
"step": 210
},
{
"entropy": 0.6997399874031544,
"epoch": 2.3183023872679045,
"grad_norm": 2.3004889488220215,
"learning_rate": 7.761486381573326e-05,
"loss": 0.6166,
"mean_token_accuracy": 0.8439405784010887,
"num_tokens": 49392.0,
"step": 220
},
{
"entropy": 0.6859173461794853,
"epoch": 2.424403183023873,
"grad_norm": 1.3413803577423096,
"learning_rate": 5.6747386659315755e-05,
"loss": 0.6388,
"mean_token_accuracy": 0.8292661786079407,
"num_tokens": 51499.0,
"step": 230
},
{
"entropy": 0.6497876241803169,
"epoch": 2.5305039787798407,
"grad_norm": 3.4095118045806885,
"learning_rate": 3.878660868757322e-05,
"loss": 0.7073,
"mean_token_accuracy": 0.827797320485115,
"num_tokens": 53730.0,
"step": 240
},
{
"entropy": 0.6810318753123283,
"epoch": 2.636604774535809,
"grad_norm": 1.5059446096420288,
"learning_rate": 2.4002676719139166e-05,
"loss": 0.6202,
"mean_token_accuracy": 0.8435622423887252,
"num_tokens": 55788.0,
"step": 250
},
{
"entropy": 0.6908333510160446,
"epoch": 2.7427055702917773,
"grad_norm": 2.3477425575256348,
"learning_rate": 1.2617954851740832e-05,
"loss": 0.5653,
"mean_token_accuracy": 0.8467133089900016,
"num_tokens": 57735.0,
"step": 260
},
{
"entropy": 0.6482014119625091,
"epoch": 2.8488063660477456,
"grad_norm": 1.8979235887527466,
"learning_rate": 4.803679899192393e-06,
"loss": 0.6111,
"mean_token_accuracy": 0.8416899383068085,
"num_tokens": 60064.0,
"step": 270
},
{
"entropy": 0.6196089655160903,
"epoch": 2.9549071618037135,
"grad_norm": 2.2915163040161133,
"learning_rate": 6.773858303274482e-07,
"loss": 0.6357,
"mean_token_accuracy": 0.8439681366086006,
"num_tokens": 62293.0,
"step": 280
},
{
"epoch": 3.0,
"eval_entropy": 1.2214261406943911,
"eval_loss": 2.704627752304077,
"eval_mean_token_accuracy": 0.554165651400884,
"eval_num_tokens": 63285.0,
"eval_runtime": 18.1229,
"eval_samples_per_second": 2.318,
"eval_steps_per_second": 2.318,
"step": 285
}
],
"logging_steps": 10,
"max_steps": 285,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1429848321131520.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}