aleegis commited on
Commit
c2e8d6c
·
verified ·
1 Parent(s): 7480b63

Training in progress, step 2400, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe945d679deecbd7e8f1bf3f6218e8071999cc48891a5cc26c87950cbf8ce49b
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f09644560ff21c0cac0dc280c2e5a17b51797de861561d39016c4a93cb2ecfc
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fbd4f0c6005622c69470043161d8ddd8639be3e6da6538cc0549895d966884a8
3
  size 203719079
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f5fbc165c90fc400193733f2f4ec0de9bb66ad7ce5d2623ea31301c3c843aa8
3
  size 203719079
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b980a66b6e57f076f69807f45a48b45a7f943415f47c6d25a6395314cf209e0d
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d89f11a61cdbf936e1922f1f4c1975dcbb372c36c7df8bb6932c03cf9e94d69c
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb0f627823758704b99c0100536ff12ee5c39e3502f9f5e2784e258e698a30f7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a55f3acde6dde848ed30d866c5a5ff7da6af4cd03cbfab7f1dfd0c0dc9eb6a4
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.33264691905591637,
6
  "eval_steps": 500,
7
- "global_step": 2100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1478,6 +1478,216 @@
1478
  "learning_rate": 2.505040022749265e-05,
1479
  "loss": 1.2165,
1480
  "step": 2100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1481
  }
1482
  ],
1483
  "logging_steps": 10,
@@ -1497,7 +1707,7 @@
1497
  "attributes": {}
1498
  }
1499
  },
1500
- "total_flos": 6.91465321709568e+17,
1501
  "train_batch_size": 16,
1502
  "trial_name": null,
1503
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.38016790749247587,
6
  "eval_steps": 500,
7
+ "global_step": 2400,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1478
  "learning_rate": 2.505040022749265e-05,
1479
  "loss": 1.2165,
1480
  "step": 2100
1481
+ },
1482
+ {
1483
+ "epoch": 0.3342309520038017,
1484
+ "grad_norm": 1.0949828624725342,
1485
+ "learning_rate": 2.4547929212481435e-05,
1486
+ "loss": 1.2863,
1487
+ "step": 2110
1488
+ },
1489
+ {
1490
+ "epoch": 0.335814984951687,
1491
+ "grad_norm": 0.6609100103378296,
1492
+ "learning_rate": 2.404890400043023e-05,
1493
+ "loss": 1.2751,
1494
+ "step": 2120
1495
+ },
1496
+ {
1497
+ "epoch": 0.33739901789957233,
1498
+ "grad_norm": 0.7404264807701111,
1499
+ "learning_rate": 2.3553392151364536e-05,
1500
+ "loss": 1.3004,
1501
+ "step": 2130
1502
+ },
1503
+ {
1504
+ "epoch": 0.3389830508474576,
1505
+ "grad_norm": 0.8399338722229004,
1506
+ "learning_rate": 2.3061460749656844e-05,
1507
+ "loss": 1.4249,
1508
+ "step": 2140
1509
+ },
1510
+ {
1511
+ "epoch": 0.34056708379534295,
1512
+ "grad_norm": 0.9257438778877258,
1513
+ "learning_rate": 2.2573176394944328e-05,
1514
+ "loss": 1.4161,
1515
+ "step": 2150
1516
+ },
1517
+ {
1518
+ "epoch": 0.34215111674322823,
1519
+ "grad_norm": 0.8689798712730408,
1520
+ "learning_rate": 2.2088605193112383e-05,
1521
+ "loss": 1.2699,
1522
+ "step": 2160
1523
+ },
1524
+ {
1525
+ "epoch": 0.3437351496911136,
1526
+ "grad_norm": 0.7370318174362183,
1527
+ "learning_rate": 2.160781274734495e-05,
1528
+ "loss": 1.3336,
1529
+ "step": 2170
1530
+ },
1531
+ {
1532
+ "epoch": 0.3453191826389989,
1533
+ "grad_norm": 0.7378177046775818,
1534
+ "learning_rate": 2.1130864149242878e-05,
1535
+ "loss": 1.3478,
1536
+ "step": 2180
1537
+ },
1538
+ {
1539
+ "epoch": 0.3469032155868842,
1540
+ "grad_norm": 0.7509024143218994,
1541
+ "learning_rate": 2.0657823970011618e-05,
1542
+ "loss": 1.3091,
1543
+ "step": 2190
1544
+ },
1545
+ {
1546
+ "epoch": 0.34848724853476953,
1547
+ "grad_norm": 0.7333298921585083,
1548
+ "learning_rate": 2.0188756251719203e-05,
1549
+ "loss": 1.3382,
1550
+ "step": 2200
1551
+ },
1552
+ {
1553
+ "epoch": 0.3500712814826548,
1554
+ "grad_norm": 0.729038655757904,
1555
+ "learning_rate": 1.9723724498626105e-05,
1556
+ "loss": 1.2599,
1557
+ "step": 2210
1558
+ },
1559
+ {
1560
+ "epoch": 0.35165531443054016,
1561
+ "grad_norm": 1.0300594568252563,
1562
+ "learning_rate": 1.9262791668587676e-05,
1563
+ "loss": 1.332,
1564
+ "step": 2220
1565
+ },
1566
+ {
1567
+ "epoch": 0.3532393473784255,
1568
+ "grad_norm": 1.1736242771148682,
1569
+ "learning_rate": 1.8806020164530702e-05,
1570
+ "loss": 1.2849,
1571
+ "step": 2230
1572
+ },
1573
+ {
1574
+ "epoch": 0.3548233803263108,
1575
+ "grad_norm": 0.758115828037262,
1576
+ "learning_rate": 1.8353471826005036e-05,
1577
+ "loss": 1.4054,
1578
+ "step": 2240
1579
+ },
1580
+ {
1581
+ "epoch": 0.3564074132741961,
1582
+ "grad_norm": 0.9238961935043335,
1583
+ "learning_rate": 1.7905207920811572e-05,
1584
+ "loss": 1.4045,
1585
+ "step": 2250
1586
+ },
1587
+ {
1588
+ "epoch": 0.3579914462220814,
1589
+ "grad_norm": 0.8140641450881958,
1590
+ "learning_rate": 1.746128913670746e-05,
1591
+ "loss": 1.3212,
1592
+ "step": 2260
1593
+ },
1594
+ {
1595
+ "epoch": 0.35957547916996674,
1596
+ "grad_norm": 0.690086305141449,
1597
+ "learning_rate": 1.7021775573190013e-05,
1598
+ "loss": 1.3071,
1599
+ "step": 2270
1600
+ },
1601
+ {
1602
+ "epoch": 0.3611595121178521,
1603
+ "grad_norm": 0.7119179368019104,
1604
+ "learning_rate": 1.6586726733360237e-05,
1605
+ "loss": 1.1963,
1606
+ "step": 2280
1607
+ },
1608
+ {
1609
+ "epoch": 0.36274354506573736,
1610
+ "grad_norm": 0.90193772315979,
1611
+ "learning_rate": 1.615620151586697e-05,
1612
+ "loss": 1.3169,
1613
+ "step": 2290
1614
+ },
1615
+ {
1616
+ "epoch": 0.3643275780136227,
1617
+ "grad_norm": 0.791403591632843,
1618
+ "learning_rate": 1.5730258206933025e-05,
1619
+ "loss": 1.3956,
1620
+ "step": 2300
1621
+ },
1622
+ {
1623
+ "epoch": 0.365911610961508,
1624
+ "grad_norm": 0.9166008234024048,
1625
+ "learning_rate": 1.530895447246411e-05,
1626
+ "loss": 1.2468,
1627
+ "step": 2310
1628
+ },
1629
+ {
1630
+ "epoch": 0.3674956439093933,
1631
+ "grad_norm": 0.8105589151382446,
1632
+ "learning_rate": 1.4892347350241881e-05,
1633
+ "loss": 1.2856,
1634
+ "step": 2320
1635
+ },
1636
+ {
1637
+ "epoch": 0.3690796768572786,
1638
+ "grad_norm": 0.7731050252914429,
1639
+ "learning_rate": 1.448049324220181e-05,
1640
+ "loss": 1.32,
1641
+ "step": 2330
1642
+ },
1643
+ {
1644
+ "epoch": 0.37066370980516394,
1645
+ "grad_norm": 0.6832409501075745,
1646
+ "learning_rate": 1.4073447906797376e-05,
1647
+ "loss": 1.4165,
1648
+ "step": 2340
1649
+ },
1650
+ {
1651
+ "epoch": 0.3722477427530493,
1652
+ "grad_norm": 0.7237154841423035,
1653
+ "learning_rate": 1.367126645145121e-05,
1654
+ "loss": 1.3432,
1655
+ "step": 2350
1656
+ },
1657
+ {
1658
+ "epoch": 0.37383177570093457,
1659
+ "grad_norm": 0.8293668031692505,
1660
+ "learning_rate": 1.327400332509442e-05,
1661
+ "loss": 1.3723,
1662
+ "step": 2360
1663
+ },
1664
+ {
1665
+ "epoch": 0.3754158086488199,
1666
+ "grad_norm": 0.9197924733161926,
1667
+ "learning_rate": 1.2881712310795118e-05,
1668
+ "loss": 1.2893,
1669
+ "step": 2370
1670
+ },
1671
+ {
1672
+ "epoch": 0.3769998415967052,
1673
+ "grad_norm": 0.725936233997345,
1674
+ "learning_rate": 1.2494446518477022e-05,
1675
+ "loss": 1.2668,
1676
+ "step": 2380
1677
+ },
1678
+ {
1679
+ "epoch": 0.3785838745445905,
1680
+ "grad_norm": 0.768979012966156,
1681
+ "learning_rate": 1.2112258377729274e-05,
1682
+ "loss": 1.347,
1683
+ "step": 2390
1684
+ },
1685
+ {
1686
+ "epoch": 0.38016790749247587,
1687
+ "grad_norm": 0.7712375521659851,
1688
+ "learning_rate": 1.1735199630708222e-05,
1689
+ "loss": 1.2925,
1690
+ "step": 2400
1691
  }
1692
  ],
1693
  "logging_steps": 10,
 
1707
  "attributes": {}
1708
  }
1709
  },
1710
+ "total_flos": 7.90246081953792e+17,
1711
  "train_batch_size": 16,
1712
  "trial_name": null,
1713
  "trial_params": null