Training in progress, epoch 2, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 203456160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:19769e701871b2260697dc643ec4aea0abcb07d9627dba6d33b1c35448ed5445
|
| 3 |
size 203456160
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 407127126
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb487adb67e963378b094acda99d82a64ef2abc767c999372efcbae9e57b9d17
|
| 3 |
size 407127126
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7d90778219ded8bedf9ad8925e5a21e1a56e10161eb0795f8ee240c5509e0b12
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:891cad020bf7bee78efa739dc10e1e4315e34b096ed70226b38590ec81d7d418
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 500,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -1274,6 +1274,237 @@
|
|
| 1274 |
"learning_rate": 6.680041969810203e-06,
|
| 1275 |
"loss": 0.5927,
|
| 1276 |
"step": 1267
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1277 |
}
|
| 1278 |
],
|
| 1279 |
"logging_steps": 7,
|
|
@@ -1288,12 +1519,12 @@
|
|
| 1288 |
"should_evaluate": false,
|
| 1289 |
"should_log": false,
|
| 1290 |
"should_save": true,
|
| 1291 |
-
"should_training_stop":
|
| 1292 |
},
|
| 1293 |
"attributes": {}
|
| 1294 |
}
|
| 1295 |
},
|
| 1296 |
-
"total_flos":
|
| 1297 |
"train_batch_size": 2,
|
| 1298 |
"trial_name": null,
|
| 1299 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 2.3645320197044333,
|
| 5 |
"eval_steps": 500,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 1274 |
"learning_rate": 6.680041969810203e-06,
|
| 1275 |
"loss": 0.5927,
|
| 1276 |
"step": 1267
|
| 1277 |
+
},
|
| 1278 |
+
{
|
| 1279 |
+
"epoch": 2.0082758620689654,
|
| 1280 |
+
"grad_norm": 0.2685578763484955,
|
| 1281 |
+
"learning_rate": 6.293212171147206e-06,
|
| 1282 |
+
"loss": 0.6135,
|
| 1283 |
+
"step": 1274
|
| 1284 |
+
},
|
| 1285 |
+
{
|
| 1286 |
+
"epoch": 2.0193103448275864,
|
| 1287 |
+
"grad_norm": 0.2770611047744751,
|
| 1288 |
+
"learning_rate": 5.917166368382277e-06,
|
| 1289 |
+
"loss": 0.5407,
|
| 1290 |
+
"step": 1281
|
| 1291 |
+
},
|
| 1292 |
+
{
|
| 1293 |
+
"epoch": 2.030344827586207,
|
| 1294 |
+
"grad_norm": 0.2914046049118042,
|
| 1295 |
+
"learning_rate": 5.5519973451903405e-06,
|
| 1296 |
+
"loss": 0.543,
|
| 1297 |
+
"step": 1288
|
| 1298 |
+
},
|
| 1299 |
+
{
|
| 1300 |
+
"epoch": 2.0413793103448277,
|
| 1301 |
+
"grad_norm": 0.29394814372062683,
|
| 1302 |
+
"learning_rate": 5.197795201563743e-06,
|
| 1303 |
+
"loss": 0.5353,
|
| 1304 |
+
"step": 1295
|
| 1305 |
+
},
|
| 1306 |
+
{
|
| 1307 |
+
"epoch": 2.0524137931034483,
|
| 1308 |
+
"grad_norm": 0.28662681579589844,
|
| 1309 |
+
"learning_rate": 4.8546473315813856e-06,
|
| 1310 |
+
"loss": 0.5408,
|
| 1311 |
+
"step": 1302
|
| 1312 |
+
},
|
| 1313 |
+
{
|
| 1314 |
+
"epoch": 2.063448275862069,
|
| 1315 |
+
"grad_norm": 0.28723788261413574,
|
| 1316 |
+
"learning_rate": 4.522638401845547e-06,
|
| 1317 |
+
"loss": 0.5296,
|
| 1318 |
+
"step": 1309
|
| 1319 |
+
},
|
| 1320 |
+
{
|
| 1321 |
+
"epoch": 2.0744827586206895,
|
| 1322 |
+
"grad_norm": 0.29902586340904236,
|
| 1323 |
+
"learning_rate": 4.2018503305916775e-06,
|
| 1324 |
+
"loss": 0.5273,
|
| 1325 |
+
"step": 1316
|
| 1326 |
+
},
|
| 1327 |
+
{
|
| 1328 |
+
"epoch": 2.08551724137931,
|
| 1329 |
+
"grad_norm": 0.28761228919029236,
|
| 1330 |
+
"learning_rate": 3.892362267476313e-06,
|
| 1331 |
+
"loss": 0.5336,
|
| 1332 |
+
"step": 1323
|
| 1333 |
+
},
|
| 1334 |
+
{
|
| 1335 |
+
"epoch": 2.0965517241379312,
|
| 1336 |
+
"grad_norm": 0.2887546420097351,
|
| 1337 |
+
"learning_rate": 3.5942505740480582e-06,
|
| 1338 |
+
"loss": 0.537,
|
| 1339 |
+
"step": 1330
|
| 1340 |
+
},
|
| 1341 |
+
{
|
| 1342 |
+
"epoch": 2.107586206896552,
|
| 1343 |
+
"grad_norm": 0.28455686569213867,
|
| 1344 |
+
"learning_rate": 3.3075888049065196e-06,
|
| 1345 |
+
"loss": 0.5369,
|
| 1346 |
+
"step": 1337
|
| 1347 |
+
},
|
| 1348 |
+
{
|
| 1349 |
+
"epoch": 2.1186206896551725,
|
| 1350 |
+
"grad_norm": 0.2921008765697479,
|
| 1351 |
+
"learning_rate": 3.03244768955383e-06,
|
| 1352 |
+
"loss": 0.5376,
|
| 1353 |
+
"step": 1344
|
| 1354 |
+
},
|
| 1355 |
+
{
|
| 1356 |
+
"epoch": 2.129655172413793,
|
| 1357 |
+
"grad_norm": 0.28860318660736084,
|
| 1358 |
+
"learning_rate": 2.7688951149431595e-06,
|
| 1359 |
+
"loss": 0.5451,
|
| 1360 |
+
"step": 1351
|
| 1361 |
+
},
|
| 1362 |
+
{
|
| 1363 |
+
"epoch": 2.1406896551724137,
|
| 1364 |
+
"grad_norm": 0.29887181520462036,
|
| 1365 |
+
"learning_rate": 2.5169961087286974e-06,
|
| 1366 |
+
"loss": 0.5403,
|
| 1367 |
+
"step": 1358
|
| 1368 |
+
},
|
| 1369 |
+
{
|
| 1370 |
+
"epoch": 2.1517241379310343,
|
| 1371 |
+
"grad_norm": 0.2972882091999054,
|
| 1372 |
+
"learning_rate": 2.276812823220964e-06,
|
| 1373 |
+
"loss": 0.5478,
|
| 1374 |
+
"step": 1365
|
| 1375 |
+
},
|
| 1376 |
+
{
|
| 1377 |
+
"epoch": 2.162758620689655,
|
| 1378 |
+
"grad_norm": 0.2919308841228485,
|
| 1379 |
+
"learning_rate": 2.048404520051722e-06,
|
| 1380 |
+
"loss": 0.526,
|
| 1381 |
+
"step": 1372
|
| 1382 |
+
},
|
| 1383 |
+
{
|
| 1384 |
+
"epoch": 2.173793103448276,
|
| 1385 |
+
"grad_norm": 0.2883777320384979,
|
| 1386 |
+
"learning_rate": 1.8318275555520237e-06,
|
| 1387 |
+
"loss": 0.536,
|
| 1388 |
+
"step": 1379
|
| 1389 |
+
},
|
| 1390 |
+
{
|
| 1391 |
+
"epoch": 2.1848275862068967,
|
| 1392 |
+
"grad_norm": 0.29888492822647095,
|
| 1393 |
+
"learning_rate": 1.6271353668471655e-06,
|
| 1394 |
+
"loss": 0.5425,
|
| 1395 |
+
"step": 1386
|
| 1396 |
+
},
|
| 1397 |
+
{
|
| 1398 |
+
"epoch": 2.1958620689655173,
|
| 1399 |
+
"grad_norm": 0.2822500765323639,
|
| 1400 |
+
"learning_rate": 1.4343784586718311e-06,
|
| 1401 |
+
"loss": 0.5325,
|
| 1402 |
+
"step": 1393
|
| 1403 |
+
},
|
| 1404 |
+
{
|
| 1405 |
+
"epoch": 2.206896551724138,
|
| 1406 |
+
"grad_norm": 0.293379545211792,
|
| 1407 |
+
"learning_rate": 1.2536043909088191e-06,
|
| 1408 |
+
"loss": 0.5186,
|
| 1409 |
+
"step": 1400
|
| 1410 |
+
},
|
| 1411 |
+
{
|
| 1412 |
+
"epoch": 2.2179310344827585,
|
| 1413 |
+
"grad_norm": 0.2903384864330292,
|
| 1414 |
+
"learning_rate": 1.0848577668543802e-06,
|
| 1415 |
+
"loss": 0.5306,
|
| 1416 |
+
"step": 1407
|
| 1417 |
+
},
|
| 1418 |
+
{
|
| 1419 |
+
"epoch": 2.228965517241379,
|
| 1420 |
+
"grad_norm": 0.29725009202957153,
|
| 1421 |
+
"learning_rate": 9.281802222129765e-07,
|
| 1422 |
+
"loss": 0.5289,
|
| 1423 |
+
"step": 1414
|
| 1424 |
+
},
|
| 1425 |
+
{
|
| 1426 |
+
"epoch": 2.24,
|
| 1427 |
+
"grad_norm": 0.2786073684692383,
|
| 1428 |
+
"learning_rate": 7.836104148243484e-07,
|
| 1429 |
+
"loss": 0.5327,
|
| 1430 |
+
"step": 1421
|
| 1431 |
+
},
|
| 1432 |
+
{
|
| 1433 |
+
"epoch": 2.251034482758621,
|
| 1434 |
+
"grad_norm": 0.288795530796051,
|
| 1435 |
+
"learning_rate": 6.511840151252169e-07,
|
| 1436 |
+
"loss": 0.5236,
|
| 1437 |
+
"step": 1428
|
| 1438 |
+
},
|
| 1439 |
+
{
|
| 1440 |
+
"epoch": 2.2620689655172415,
|
| 1441 |
+
"grad_norm": 0.30113428831100464,
|
| 1442 |
+
"learning_rate": 5.309336973481683e-07,
|
| 1443 |
+
"loss": 0.5374,
|
| 1444 |
+
"step": 1435
|
| 1445 |
+
},
|
| 1446 |
+
{
|
| 1447 |
+
"epoch": 2.273103448275862,
|
| 1448 |
+
"grad_norm": 0.29817473888397217,
|
| 1449 |
+
"learning_rate": 4.228891314597694e-07,
|
| 1450 |
+
"loss": 0.5509,
|
| 1451 |
+
"step": 1442
|
| 1452 |
+
},
|
| 1453 |
+
{
|
| 1454 |
+
"epoch": 2.2841379310344827,
|
| 1455 |
+
"grad_norm": 0.2896219491958618,
|
| 1456 |
+
"learning_rate": 3.2707697583995167e-07,
|
| 1457 |
+
"loss": 0.5215,
|
| 1458 |
+
"step": 1449
|
| 1459 |
+
},
|
| 1460 |
+
{
|
| 1461 |
+
"epoch": 2.2951724137931033,
|
| 1462 |
+
"grad_norm": 0.2960655391216278,
|
| 1463 |
+
"learning_rate": 2.4352087070443895e-07,
|
| 1464 |
+
"loss": 0.5299,
|
| 1465 |
+
"step": 1456
|
| 1466 |
+
},
|
| 1467 |
+
{
|
| 1468 |
+
"epoch": 2.306206896551724,
|
| 1469 |
+
"grad_norm": 0.2985338270664215,
|
| 1470 |
+
"learning_rate": 1.7224143227190236e-07,
|
| 1471 |
+
"loss": 0.529,
|
| 1472 |
+
"step": 1463
|
| 1473 |
+
},
|
| 1474 |
+
{
|
| 1475 |
+
"epoch": 2.317241379310345,
|
| 1476 |
+
"grad_norm": 0.3047700822353363,
|
| 1477 |
+
"learning_rate": 1.132562476771959e-07,
|
| 1478 |
+
"loss": 0.5408,
|
| 1479 |
+
"step": 1470
|
| 1480 |
+
},
|
| 1481 |
+
{
|
| 1482 |
+
"epoch": 2.3282758620689656,
|
| 1483 |
+
"grad_norm": 0.2887052297592163,
|
| 1484 |
+
"learning_rate": 6.657987063200533e-08,
|
| 1485 |
+
"loss": 0.5317,
|
| 1486 |
+
"step": 1477
|
| 1487 |
+
},
|
| 1488 |
+
{
|
| 1489 |
+
"epoch": 2.3393103448275863,
|
| 1490 |
+
"grad_norm": 0.2880987524986267,
|
| 1491 |
+
"learning_rate": 3.2223817833931805e-08,
|
| 1492 |
+
"loss": 0.533,
|
| 1493 |
+
"step": 1484
|
| 1494 |
+
},
|
| 1495 |
+
{
|
| 1496 |
+
"epoch": 2.350344827586207,
|
| 1497 |
+
"grad_norm": 0.2891731262207031,
|
| 1498 |
+
"learning_rate": 1.019656612492592e-08,
|
| 1499 |
+
"loss": 0.533,
|
| 1500 |
+
"step": 1491
|
| 1501 |
+
},
|
| 1502 |
+
{
|
| 1503 |
+
"epoch": 2.3613793103448275,
|
| 1504 |
+
"grad_norm": 0.2875867784023285,
|
| 1505 |
+
"learning_rate": 5.035503997385949e-10,
|
| 1506 |
+
"loss": 0.5369,
|
| 1507 |
+
"step": 1498
|
| 1508 |
}
|
| 1509 |
],
|
| 1510 |
"logging_steps": 7,
|
|
|
|
| 1519 |
"should_evaluate": false,
|
| 1520 |
"should_log": false,
|
| 1521 |
"should_save": true,
|
| 1522 |
+
"should_training_stop": true
|
| 1523 |
},
|
| 1524 |
"attributes": {}
|
| 1525 |
}
|
| 1526 |
},
|
| 1527 |
+
"total_flos": 9.9530601136128e+17,
|
| 1528 |
"train_batch_size": 2,
|
| 1529 |
"trial_name": null,
|
| 1530 |
"trial_params": null
|