Training in progress, step 293, checkpoint

511421f verified 5 months ago

5.76 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.0025662959794697,
	"eval_steps": 147,
	"global_step": 293,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.003421727972626176,
	"eval_loss": 4.355165481567383,
	"eval_runtime": 52.6403,
	"eval_samples_per_second": 18.712,
	"eval_steps_per_second": 1.178,
	"step": 1
	},
	{
	"epoch": 0.03421727972626176,
	"grad_norm": 4.47415018081665,
	"learning_rate": 2e-05,
	"loss": 4.2505,
	"step": 10
	},
	{
	"epoch": 0.06843455945252352,
	"grad_norm": 1.6644771099090576,
	"learning_rate": 4e-05,
	"loss": 3.3993,
	"step": 20
	},
	{
	"epoch": 0.10265183917878529,
	"grad_norm": 1.3752275705337524,
	"learning_rate": 6e-05,
	"loss": 2.8647,
	"step": 30
	},
	{
	"epoch": 0.13686911890504705,
	"grad_norm": 1.3559530973434448,
	"learning_rate": 8e-05,
	"loss": 2.6811,
	"step": 40
	},
	{
	"epoch": 0.1710863986313088,
	"grad_norm": 1.0810866355895996,
	"learning_rate": 0.0001,
	"loss": 2.5526,
	"step": 50
	},
	{
	"epoch": 0.20530367835757057,
	"grad_norm": 1.128179669380188,
	"learning_rate": 0.00012,
	"loss": 2.5003,
	"step": 60
	},
	{
	"epoch": 0.23952095808383234,
	"grad_norm": 1.2589479684829712,
	"learning_rate": 0.00014,
	"loss": 2.421,
	"step": 70
	},
	{
	"epoch": 0.2737382378100941,
	"grad_norm": 1.1778737306594849,
	"learning_rate": 0.00016,
	"loss": 2.3997,
	"step": 80
	},
	{
	"epoch": 0.30795551753635586,
	"grad_norm": 1.136731743812561,
	"learning_rate": 0.00018,
	"loss": 2.3794,
	"step": 90
	},
	{
	"epoch": 0.3421727972626176,
	"grad_norm": 1.1050846576690674,
	"learning_rate": 0.0002,
	"loss": 2.339,
	"step": 100
	},
	{
	"epoch": 0.3763900769888794,
	"grad_norm": 1.1404505968093872,
	"learning_rate": 0.0002,
	"loss": 2.3492,
	"step": 110
	},
	{
	"epoch": 0.41060735671514115,
	"grad_norm": 1.0500866174697876,
	"learning_rate": 0.0002,
	"loss": 2.2997,
	"step": 120
	},
	{
	"epoch": 0.4448246364414029,
	"grad_norm": 1.2117984294891357,
	"learning_rate": 0.0002,
	"loss": 2.325,
	"step": 130
	},
	{
	"epoch": 0.47904191616766467,
	"grad_norm": 1.127273440361023,
	"learning_rate": 0.0002,
	"loss": 2.2696,
	"step": 140
	},
	{
	"epoch": 0.5029940119760479,
	"eval_loss": 2.2770884037017822,
	"eval_runtime": 53.1905,
	"eval_samples_per_second": 18.518,
	"eval_steps_per_second": 1.166,
	"step": 147
	},
	{
	"epoch": 0.5132591958939264,
	"grad_norm": 1.189348578453064,
	"learning_rate": 0.0002,
	"loss": 2.3341,
	"step": 150
	},
	{
	"epoch": 0.5474764756201882,
	"grad_norm": 1.0971434116363525,
	"learning_rate": 0.0002,
	"loss": 2.2627,
	"step": 160
	},
	{
	"epoch": 0.58169375534645,
	"grad_norm": 1.0656123161315918,
	"learning_rate": 0.0002,
	"loss": 2.2739,
	"step": 170
	},
	{
	"epoch": 0.6159110350727117,
	"grad_norm": 1.1617597341537476,
	"learning_rate": 0.0002,
	"loss": 2.2446,
	"step": 180
	},
	{
	"epoch": 0.6501283147989735,
	"grad_norm": 1.1609177589416504,
	"learning_rate": 0.0002,
	"loss": 2.2269,
	"step": 190
	},
	{
	"epoch": 0.6843455945252352,
	"grad_norm": 1.0725802183151245,
	"learning_rate": 0.0002,
	"loss": 2.2471,
	"step": 200
	},
	{
	"epoch": 0.718562874251497,
	"grad_norm": 1.1435920000076294,
	"learning_rate": 0.0002,
	"loss": 2.2892,
	"step": 210
	},
	{
	"epoch": 0.7527801539777588,
	"grad_norm": 1.1242313385009766,
	"learning_rate": 0.0002,
	"loss": 2.2795,
	"step": 220
	},
	{
	"epoch": 0.7869974337040205,
	"grad_norm": 1.0565266609191895,
	"learning_rate": 0.0002,
	"loss": 2.2515,
	"step": 230
	},
	{
	"epoch": 0.8212147134302823,
	"grad_norm": 1.1212342977523804,
	"learning_rate": 0.0002,
	"loss": 2.2805,
	"step": 240
	},
	{
	"epoch": 0.8554319931565441,
	"grad_norm": 1.124230146408081,
	"learning_rate": 0.0002,
	"loss": 2.2378,
	"step": 250
	},
	{
	"epoch": 0.8896492728828058,
	"grad_norm": 1.0999276638031006,
	"learning_rate": 0.0002,
	"loss": 2.222,
	"step": 260
	},
	{
	"epoch": 0.9238665526090676,
	"grad_norm": 1.1538196802139282,
	"learning_rate": 0.0002,
	"loss": 2.2267,
	"step": 270
	},
	{
	"epoch": 0.9580838323353293,
	"grad_norm": 1.2899906635284424,
	"learning_rate": 0.0002,
	"loss": 2.2384,
	"step": 280
	},
	{
	"epoch": 0.9923011120615911,
	"grad_norm": 1.3248995542526245,
	"learning_rate": 0.0002,
	"loss": 2.236,
	"step": 290
	}
	],
	"logging_steps": 10,
	"max_steps": 293,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 147,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 4.826112117351383e+17,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}