Training in progress, step 200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 335604696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:952c9588ca8aab9a3be93cbf5f2390f8fc788735e21cc497a1d52b1de2e9556e
|
| 3 |
size 335604696
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 671466706
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19fc9eff172015d6eb14d20311692a4aaec89528931721e73b51b6692e7b1230
|
| 3 |
size 671466706
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:370731f4eccf431eaed7fd7c80966de8cf9c9e7f5b12ce8e3b2169241d34df78
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2188426f4adaa3a6124c181a61860e824dc2136981845a9e9c4162d0d3700af6
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1348,6 +1348,105 @@
|
|
| 1348 |
"learning_rate": 1.9222451411073645e-06,
|
| 1349 |
"loss": 0.8576,
|
| 1350 |
"step": 187
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1351 |
}
|
| 1352 |
],
|
| 1353 |
"logging_steps": 1,
|
|
@@ -1362,12 +1461,12 @@
|
|
| 1362 |
"should_evaluate": false,
|
| 1363 |
"should_log": false,
|
| 1364 |
"should_save": true,
|
| 1365 |
-
"should_training_stop":
|
| 1366 |
},
|
| 1367 |
"attributes": {}
|
| 1368 |
}
|
| 1369 |
},
|
| 1370 |
-
"total_flos": 1.
|
| 1371 |
"train_batch_size": 2,
|
| 1372 |
"trial_name": null,
|
| 1373 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 0.2705559078418939,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1348 |
"learning_rate": 1.9222451411073645e-06,
|
| 1349 |
"loss": 0.8576,
|
| 1350 |
"step": 187
|
| 1351 |
+
},
|
| 1352 |
+
{
|
| 1353 |
+
"epoch": 0.25432255337138027,
|
| 1354 |
+
"grad_norm": 0.17826685309410095,
|
| 1355 |
+
"learning_rate": 1.6389299449645733e-06,
|
| 1356 |
+
"loss": 0.8168,
|
| 1357 |
+
"step": 188
|
| 1358 |
+
},
|
| 1359 |
+
{
|
| 1360 |
+
"epoch": 0.25567533291058975,
|
| 1361 |
+
"grad_norm": 0.17930451035499573,
|
| 1362 |
+
"learning_rate": 1.3779612414252017e-06,
|
| 1363 |
+
"loss": 0.8579,
|
| 1364 |
+
"step": 189
|
| 1365 |
+
},
|
| 1366 |
+
{
|
| 1367 |
+
"epoch": 0.2570281124497992,
|
| 1368 |
+
"grad_norm": 0.1737329512834549,
|
| 1369 |
+
"learning_rate": 1.1394185240843983e-06,
|
| 1370 |
+
"loss": 0.8247,
|
| 1371 |
+
"step": 190
|
| 1372 |
+
},
|
| 1373 |
+
{
|
| 1374 |
+
"epoch": 0.25838089198900865,
|
| 1375 |
+
"grad_norm": 0.17530955374240875,
|
| 1376 |
+
"learning_rate": 9.233744553646754e-07,
|
| 1377 |
+
"loss": 0.8438,
|
| 1378 |
+
"step": 191
|
| 1379 |
+
},
|
| 1380 |
+
{
|
| 1381 |
+
"epoch": 0.25973367152821814,
|
| 1382 |
+
"grad_norm": 0.18335498869419098,
|
| 1383 |
+
"learning_rate": 7.298948443822228e-07,
|
| 1384 |
+
"loss": 0.8925,
|
| 1385 |
+
"step": 192
|
| 1386 |
+
},
|
| 1387 |
+
{
|
| 1388 |
+
"epoch": 0.2610864510674276,
|
| 1389 |
+
"grad_norm": 0.17108893394470215,
|
| 1390 |
+
"learning_rate": 5.590386269008512e-07,
|
| 1391 |
+
"loss": 0.8095,
|
| 1392 |
+
"step": 193
|
| 1393 |
+
},
|
| 1394 |
+
{
|
| 1395 |
+
"epoch": 0.2624392306066371,
|
| 1396 |
+
"grad_norm": 0.18680942058563232,
|
| 1397 |
+
"learning_rate": 4.108578473795032e-07,
|
| 1398 |
+
"loss": 0.8519,
|
| 1399 |
+
"step": 194
|
| 1400 |
+
},
|
| 1401 |
+
{
|
| 1402 |
+
"epoch": 0.2637920101458465,
|
| 1403 |
+
"grad_norm": 0.17714115977287292,
|
| 1404 |
+
"learning_rate": 2.8539764311908407e-07,
|
| 1405 |
+
"loss": 0.8445,
|
| 1406 |
+
"step": 195
|
| 1407 |
+
},
|
| 1408 |
+
{
|
| 1409 |
+
"epoch": 0.265144789685056,
|
| 1410 |
+
"grad_norm": 0.17967358231544495,
|
| 1411 |
+
"learning_rate": 1.8269623051318515e-07,
|
| 1412 |
+
"loss": 0.8631,
|
| 1413 |
+
"step": 196
|
| 1414 |
+
},
|
| 1415 |
+
{
|
| 1416 |
+
"epoch": 0.2664975692242655,
|
| 1417 |
+
"grad_norm": 0.17863860726356506,
|
| 1418 |
+
"learning_rate": 1.027848934069625e-07,
|
| 1419 |
+
"loss": 0.8315,
|
| 1420 |
+
"step": 197
|
| 1421 |
+
},
|
| 1422 |
+
{
|
| 1423 |
+
"epoch": 0.26785034876347497,
|
| 1424 |
+
"grad_norm": 0.1744745522737503,
|
| 1425 |
+
"learning_rate": 4.568797356781784e-08,
|
| 1426 |
+
"loss": 0.805,
|
| 1427 |
+
"step": 198
|
| 1428 |
+
},
|
| 1429 |
+
{
|
| 1430 |
+
"epoch": 0.26920312830268445,
|
| 1431 |
+
"grad_norm": 0.1872616410255432,
|
| 1432 |
+
"learning_rate": 1.142286327065478e-08,
|
| 1433 |
+
"loss": 0.8821,
|
| 1434 |
+
"step": 199
|
| 1435 |
+
},
|
| 1436 |
+
{
|
| 1437 |
+
"epoch": 0.2705559078418939,
|
| 1438 |
+
"grad_norm": 0.18084542453289032,
|
| 1439 |
+
"learning_rate": 0.0,
|
| 1440 |
+
"loss": 0.8613,
|
| 1441 |
+
"step": 200
|
| 1442 |
+
},
|
| 1443 |
+
{
|
| 1444 |
+
"epoch": 0.2705559078418939,
|
| 1445 |
+
"eval_loss": 0.842223584651947,
|
| 1446 |
+
"eval_runtime": 438.0922,
|
| 1447 |
+
"eval_samples_per_second": 5.684,
|
| 1448 |
+
"eval_steps_per_second": 2.842,
|
| 1449 |
+
"step": 200
|
| 1450 |
}
|
| 1451 |
],
|
| 1452 |
"logging_steps": 1,
|
|
|
|
| 1461 |
"should_evaluate": false,
|
| 1462 |
"should_log": false,
|
| 1463 |
"should_save": true,
|
| 1464 |
+
"should_training_stop": true
|
| 1465 |
},
|
| 1466 |
"attributes": {}
|
| 1467 |
}
|
| 1468 |
},
|
| 1469 |
+
"total_flos": 1.2008903253719777e+18,
|
| 1470 |
"train_batch_size": 2,
|
| 1471 |
"trial_name": null,
|
| 1472 |
"trial_params": null
|