qwen3_14b_distill_no_think_32b_5e5 / trainer_state.json

Add files using upload-large-folder tool

42d03f9 verified 5 months ago

5.88 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.973821989528796,
	"eval_steps": 500,
	"global_step": 285,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.10471204188481675,
	"grad_norm": 1.821414025646637,
	"learning_rate": 1.5517241379310346e-05,
	"loss": 0.4862,
	"step": 10
	},
	{
	"epoch": 0.2094240837696335,
	"grad_norm": 0.3942393541183772,
	"learning_rate": 3.275862068965517e-05,
	"loss": 0.3896,
	"step": 20
	},
	{
	"epoch": 0.31413612565445026,
	"grad_norm": 0.8255719356243104,
	"learning_rate": 5e-05,
	"loss": 0.3473,
	"step": 30
	},
	{
	"epoch": 0.418848167539267,
	"grad_norm": 0.37799831218550856,
	"learning_rate": 4.981198836496775e-05,
	"loss": 0.3207,
	"step": 40
	},
	{
	"epoch": 0.5235602094240838,
	"grad_norm": 0.2838496737375545,
	"learning_rate": 4.9250781329863606e-05,
	"loss": 0.3069,
	"step": 50
	},
	{
	"epoch": 0.6282722513089005,
	"grad_norm": 0.35016464634119265,
	"learning_rate": 4.8324819970868473e-05,
	"loss": 0.2959,
	"step": 60
	},
	{
	"epoch": 0.7329842931937173,
	"grad_norm": 0.265970946973345,
	"learning_rate": 4.7048031608708876e-05,
	"loss": 0.2888,
	"step": 70
	},
	{
	"epoch": 0.837696335078534,
	"grad_norm": 0.3283794303490618,
	"learning_rate": 4.5439620328789593e-05,
	"loss": 0.2812,
	"step": 80
	},
	{
	"epoch": 0.9424083769633508,
	"grad_norm": 0.2933681591769702,
	"learning_rate": 4.352377813387398e-05,
	"loss": 0.2821,
	"step": 90
	},
	{
	"epoch": 1.0418848167539267,
	"grad_norm": 0.784193010247334,
	"learning_rate": 4.1329321073844415e-05,
	"loss": 0.2646,
	"step": 100
	},
	{
	"epoch": 1.1465968586387434,
	"grad_norm": 0.43774945809310173,
	"learning_rate": 3.888925582549006e-05,
	"loss": 0.2466,
	"step": 110
	},
	{
	"epoch": 1.2513089005235603,
	"grad_norm": 0.24107111209505053,
	"learning_rate": 3.624028324136517e-05,
	"loss": 0.2429,
	"step": 120
	},
	{
	"epoch": 1.356020942408377,
	"grad_norm": 0.20357460785884127,
	"learning_rate": 3.34222463348055e-05,
	"loss": 0.2408,
	"step": 130
	},
	{
	"epoch": 1.4607329842931938,
	"grad_norm": 0.22761632175708865,
	"learning_rate": 3.0477531003921745e-05,
	"loss": 0.2407,
	"step": 140
	},
	{
	"epoch": 1.5654450261780104,
	"grad_norm": 0.201935897320554,
	"learning_rate": 2.7450428508239024e-05,
	"loss": 0.237,
	"step": 150
	},
	{
	"epoch": 1.6701570680628273,
	"grad_norm": 0.17116285919322816,
	"learning_rate": 2.4386469286927196e-05,
	"loss": 0.2366,
	"step": 160
	},
	{
	"epoch": 1.7748691099476441,
	"grad_norm": 0.16436756820682696,
	"learning_rate": 2.1331738138615958e-05,
	"loss": 0.2357,
	"step": 170
	},
	{
	"epoch": 1.8795811518324608,
	"grad_norm": 0.16052539184685044,
	"learning_rate": 1.8332181063127545e-05,
	"loss": 0.2338,
	"step": 180
	},
	{
	"epoch": 1.9842931937172774,
	"grad_norm": 0.1441672107096996,
	"learning_rate": 1.5432914190872757e-05,
	"loss": 0.235,
	"step": 190
	},
	{
	"epoch": 2.0837696335078535,
	"grad_norm": 0.18363079335917978,
	"learning_rate": 1.2677545194255402e-05,
	"loss": 0.207,
	"step": 200
	},
	{
	"epoch": 2.18848167539267,
	"grad_norm": 0.15661072667443002,
	"learning_rate": 1.0107517387689166e-05,
	"loss": 0.2003,
	"step": 210
	},
	{
	"epoch": 2.2931937172774868,
	"grad_norm": 0.13738033977955236,
	"learning_rate": 7.761486381573327e-06,
	"loss": 0.2002,
	"step": 220
	},
	{
	"epoch": 2.3979057591623034,
	"grad_norm": 0.11864758554083908,
	"learning_rate": 5.674738665931575e-06,
	"loss": 0.2016,
	"step": 230
	},
	{
	"epoch": 2.5026178010471205,
	"grad_norm": 0.11174553605583903,
	"learning_rate": 3.878660868757323e-06,
	"loss": 0.2018,
	"step": 240
	},
	{
	"epoch": 2.607329842931937,
	"grad_norm": 0.10468557565480724,
	"learning_rate": 2.4002676719139166e-06,
	"loss": 0.1992,
	"step": 250
	},
	{
	"epoch": 2.712041884816754,
	"grad_norm": 0.11078267776032583,
	"learning_rate": 1.2617954851740832e-06,
	"loss": 0.1994,
	"step": 260
	},
	{
	"epoch": 2.816753926701571,
	"grad_norm": 0.09962367034420198,
	"learning_rate": 4.803679899192392e-07,
	"loss": 0.2002,
	"step": 270
	},
	{
	"epoch": 2.9214659685863875,
	"grad_norm": 0.10060873138847887,
	"learning_rate": 6.773858303274483e-08,
	"loss": 0.1993,
	"step": 280
	},
	{
	"epoch": 2.973821989528796,
	"step": 285,
	"total_flos": 2039941919932416.0,
	"train_loss": 0.25715315007326894,
	"train_runtime": 42232.1226,
	"train_samples_per_second": 3.469,
	"train_steps_per_second": 0.007
	}
	],
	"logging_steps": 10,
	"max_steps": 285,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 50,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2039941919932416.0,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}