Training in progress, step 2000
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f6a18b3b5403b88ecb1b33c9566847f51cf7d6c6ba999962e7278ea5bc012f2
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9aaaacbfd6a47040d3b27a90d1779024173c4c14393f22f74a86b31139c5fbc3
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b525d664730e8b50d6275487d6259a5afc14e3aeb2c28ddbabbbf181356776fd
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48e867be300fa26501aba8a4d1bcdd1fd9ba2c9963f931d58c567b2d58d5efd7
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -180,3 +180,61 @@
|
|
| 180 |
{"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.2582685649394989, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:01:36", "remaining_time": "12:02:44"}
|
| 181 |
{"current_steps": 1510, "total_steps": 3751, "loss": 0.2604, "lr": 7.463636707741458e-06, "epoch": 0.4025526100938068, "percentage": 40.26, "elapsed_time": "8:04:56", "remaining_time": "11:59:42"}
|
| 182 |
{"current_steps": 1520, "total_steps": 3751, "loss": 0.264, "lr": 7.423030369365175e-06, "epoch": 0.4052185214189313, "percentage": 40.52, "elapsed_time": "8:07:14", "remaining_time": "11:55:09"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
{"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.2582685649394989, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:01:36", "remaining_time": "12:02:44"}
|
| 181 |
{"current_steps": 1510, "total_steps": 3751, "loss": 0.2604, "lr": 7.463636707741458e-06, "epoch": 0.4025526100938068, "percentage": 40.26, "elapsed_time": "8:04:56", "remaining_time": "11:59:42"}
|
| 182 |
{"current_steps": 1520, "total_steps": 3751, "loss": 0.264, "lr": 7.423030369365175e-06, "epoch": 0.4052185214189313, "percentage": 40.52, "elapsed_time": "8:07:14", "remaining_time": "11:55:09"}
|
| 183 |
+
{"current_steps": 1530, "total_steps": 3751, "loss": 0.2565, "lr": 7.382214084839993e-06, "epoch": 0.40788443274405584, "percentage": 40.79, "elapsed_time": "8:09:34", "remaining_time": "11:50:41"}
|
| 184 |
+
{"current_steps": 1540, "total_steps": 3751, "loss": 0.2638, "lr": 7.341191390738073e-06, "epoch": 0.4105503440691804, "percentage": 41.06, "elapsed_time": "8:11:52", "remaining_time": "11:46:11"}
|
| 185 |
+
{"current_steps": 1550, "total_steps": 3751, "loss": 0.2545, "lr": 7.299965841516164e-06, "epoch": 0.41321625539430495, "percentage": 41.32, "elapsed_time": "8:14:10", "remaining_time": "11:41:43"}
|
| 186 |
+
{"current_steps": 1550, "total_steps": 3751, "eval_loss": 0.25675299763679504, "epoch": 0.41321625539430495, "percentage": 41.32, "elapsed_time": "8:18:33", "remaining_time": "11:47:56"}
|
| 187 |
+
{"current_steps": 1560, "total_steps": 3751, "loss": 0.2637, "lr": 7.2585410092076154e-06, "epoch": 0.4158821667194295, "percentage": 41.59, "elapsed_time": "8:20:57", "remaining_time": "11:43:34"}
|
| 188 |
+
{"current_steps": 1570, "total_steps": 3751, "loss": 0.2489, "lr": 7.216920483112886e-06, "epoch": 0.418548078044554, "percentage": 41.86, "elapsed_time": "8:23:17", "remaining_time": "11:39:08"}
|
| 189 |
+
{"current_steps": 1580, "total_steps": 3751, "loss": 0.2545, "lr": 7.175107869488539e-06, "epoch": 0.4212139893696786, "percentage": 42.12, "elapsed_time": "8:25:35", "remaining_time": "11:34:42"}
|
| 190 |
+
{"current_steps": 1590, "total_steps": 3751, "loss": 0.2644, "lr": 7.133106791234771e-06, "epoch": 0.42387990069480314, "percentage": 42.39, "elapsed_time": "8:27:55", "remaining_time": "11:30:20"}
|
| 191 |
+
{"current_steps": 1600, "total_steps": 3751, "loss": 0.2618, "lr": 7.090920887581507e-06, "epoch": 0.4265458120199277, "percentage": 42.66, "elapsed_time": "8:30:19", "remaining_time": "11:26:03"}
|
| 192 |
+
{"current_steps": 1600, "total_steps": 3751, "eval_loss": 0.25225213170051575, "epoch": 0.4265458120199277, "percentage": 42.66, "elapsed_time": "8:34:42", "remaining_time": "11:31:57"}
|
| 193 |
+
{"current_steps": 1610, "total_steps": 3751, "loss": 0.246, "lr": 7.048553813773075e-06, "epoch": 0.42921172334505225, "percentage": 42.92, "elapsed_time": "8:37:06", "remaining_time": "11:27:39"}
|
| 194 |
+
{"current_steps": 1620, "total_steps": 3751, "loss": 0.248, "lr": 7.006009240751488e-06, "epoch": 0.43187763467017676, "percentage": 43.19, "elapsed_time": "8:39:24", "remaining_time": "11:23:15"}
|
| 195 |
+
{"current_steps": 1630, "total_steps": 3751, "loss": 0.2592, "lr": 6.963290854838376e-06, "epoch": 0.4345435459953013, "percentage": 43.46, "elapsed_time": "8:41:42", "remaining_time": "11:18:52"}
|
| 196 |
+
{"current_steps": 1640, "total_steps": 3751, "loss": 0.2502, "lr": 6.920402357415582e-06, "epoch": 0.4372094573204259, "percentage": 43.72, "elapsed_time": "8:44:03", "remaining_time": "11:14:33"}
|
| 197 |
+
{"current_steps": 1650, "total_steps": 3751, "loss": 0.2615, "lr": 6.877347464604446e-06, "epoch": 0.43987536864555044, "percentage": 43.99, "elapsed_time": "8:46:23", "remaining_time": "11:10:16"}
|
| 198 |
+
{"current_steps": 1650, "total_steps": 3751, "eval_loss": 0.2549818754196167, "epoch": 0.43987536864555044, "percentage": 43.99, "elapsed_time": "8:50:47", "remaining_time": "11:15:51"}
|
| 199 |
+
{"current_steps": 1660, "total_steps": 3751, "loss": 0.2468, "lr": 6.834129906943822e-06, "epoch": 0.442541279970675, "percentage": 44.25, "elapsed_time": "8:53:04", "remaining_time": "11:11:29"}
|
| 200 |
+
{"current_steps": 1670, "total_steps": 3751, "loss": 0.2522, "lr": 6.790753429066838e-06, "epoch": 0.4452071912957995, "percentage": 44.52, "elapsed_time": "8:55:21", "remaining_time": "11:07:06"}
|
| 201 |
+
{"current_steps": 1680, "total_steps": 3751, "loss": 0.2468, "lr": 6.7472217893764465e-06, "epoch": 0.44787310262092406, "percentage": 44.79, "elapsed_time": "8:57:35", "remaining_time": "11:02:42"}
|
| 202 |
+
{"current_steps": 1690, "total_steps": 3751, "loss": 0.2525, "lr": 6.70353875971976e-06, "epoch": 0.4505390139460486, "percentage": 45.05, "elapsed_time": "8:59:56", "remaining_time": "10:58:28"}
|
| 203 |
+
{"current_steps": 1700, "total_steps": 3751, "loss": 0.2512, "lr": 6.659708125061242e-06, "epoch": 0.4532049252711732, "percentage": 45.32, "elapsed_time": "9:02:16", "remaining_time": "10:54:14"}
|
| 204 |
+
{"current_steps": 1700, "total_steps": 3751, "eval_loss": 0.24882382154464722, "epoch": 0.4532049252711732, "percentage": 45.32, "elapsed_time": "9:06:39", "remaining_time": "10:59:31"}
|
| 205 |
+
{"current_steps": 1710, "total_steps": 3751, "loss": 0.2522, "lr": 6.615733683154762e-06, "epoch": 0.45587083659629773, "percentage": 45.59, "elapsed_time": "9:09:01", "remaining_time": "10:55:17"}
|
| 206 |
+
{"current_steps": 1720, "total_steps": 3751, "loss": 0.2505, "lr": 6.571619244214521e-06, "epoch": 0.45853674792142224, "percentage": 45.85, "elapsed_time": "9:11:22", "remaining_time": "10:51:03"}
|
| 207 |
+
{"current_steps": 1730, "total_steps": 3751, "loss": 0.2596, "lr": 6.527368630584919e-06, "epoch": 0.4612026592465468, "percentage": 46.12, "elapsed_time": "9:13:40", "remaining_time": "10:46:48"}
|
| 208 |
+
{"current_steps": 1740, "total_steps": 3751, "loss": 0.2494, "lr": 6.482985676409368e-06, "epoch": 0.46386857057167136, "percentage": 46.39, "elapsed_time": "9:15:57", "remaining_time": "10:42:32"}
|
| 209 |
+
{"current_steps": 1750, "total_steps": 3751, "loss": 0.245, "lr": 6.438474227298065e-06, "epoch": 0.4665344818967959, "percentage": 46.65, "elapsed_time": "9:18:14", "remaining_time": "10:38:18"}
|
| 210 |
+
{"current_steps": 1750, "total_steps": 3751, "eval_loss": 0.25038692355155945, "epoch": 0.4665344818967959, "percentage": 46.65, "elapsed_time": "9:22:36", "remaining_time": "10:43:18"}
|
| 211 |
+
{"current_steps": 1760, "total_steps": 3751, "loss": 0.2595, "lr": 6.393838139994797e-06, "epoch": 0.4692003932219205, "percentage": 46.92, "elapsed_time": "9:24:58", "remaining_time": "10:39:07"}
|
| 212 |
+
{"current_steps": 1770, "total_steps": 3751, "loss": 0.2516, "lr": 6.349081282042768e-06, "epoch": 0.471866304547045, "percentage": 47.19, "elapsed_time": "9:27:16", "remaining_time": "10:34:53"}
|
| 213 |
+
{"current_steps": 1780, "total_steps": 3751, "loss": 0.2469, "lr": 6.304207531449486e-06, "epoch": 0.47453221587216954, "percentage": 47.45, "elapsed_time": "9:29:35", "remaining_time": "10:30:42"}
|
| 214 |
+
{"current_steps": 1790, "total_steps": 3751, "loss": 0.2354, "lr": 6.259220776350746e-06, "epoch": 0.4771981271972941, "percentage": 47.72, "elapsed_time": "9:31:59", "remaining_time": "10:26:37"}
|
| 215 |
+
{"current_steps": 1800, "total_steps": 3751, "loss": 0.2503, "lr": 6.2141249146737545e-06, "epoch": 0.47986403852241866, "percentage": 47.99, "elapsed_time": "9:34:19", "remaining_time": "10:22:30"}
|
| 216 |
+
{"current_steps": 1800, "total_steps": 3751, "eval_loss": 0.24808603525161743, "epoch": 0.47986403852241866, "percentage": 47.99, "elapsed_time": "9:38:42", "remaining_time": "10:27:15"}
|
| 217 |
+
{"current_steps": 1810, "total_steps": 3751, "loss": 0.2466, "lr": 6.168923853799369e-06, "epoch": 0.4825299498475432, "percentage": 48.25, "elapsed_time": "9:41:02", "remaining_time": "10:23:06"}
|
| 218 |
+
{"current_steps": 1820, "total_steps": 3751, "loss": 0.2467, "lr": 6.123621510223552e-06, "epoch": 0.4851958611726677, "percentage": 48.52, "elapsed_time": "9:43:21", "remaining_time": "10:18:56"}
|
| 219 |
+
{"current_steps": 1830, "total_steps": 3751, "loss": 0.2523, "lr": 6.0782218092180164e-06, "epoch": 0.4878617724977923, "percentage": 48.79, "elapsed_time": "9:45:40", "remaining_time": "10:14:47"}
|
| 220 |
+
{"current_steps": 1840, "total_steps": 3751, "loss": 0.2428, "lr": 6.032728684490118e-06, "epoch": 0.49052768382291684, "percentage": 49.05, "elapsed_time": "9:47:56", "remaining_time": "10:10:37"}
|
| 221 |
+
{"current_steps": 1850, "total_steps": 3751, "loss": 0.2402, "lr": 5.987146077842015e-06, "epoch": 0.4931935951480414, "percentage": 49.32, "elapsed_time": "9:50:14", "remaining_time": "10:06:31"}
|
| 222 |
+
{"current_steps": 1850, "total_steps": 3751, "eval_loss": 0.24496783316135406, "epoch": 0.4931935951480414, "percentage": 49.32, "elapsed_time": "9:54:37", "remaining_time": "10:11:01"}
|
| 223 |
+
{"current_steps": 1860, "total_steps": 3751, "loss": 0.2526, "lr": 5.941477938829126e-06, "epoch": 0.49585950647316596, "percentage": 49.59, "elapsed_time": "9:56:55", "remaining_time": "10:06:52"}
|
| 224 |
+
{"current_steps": 1870, "total_steps": 3751, "loss": 0.2462, "lr": 5.8957282244179125e-06, "epoch": 0.49852541779829046, "percentage": 49.85, "elapsed_time": "9:59:17", "remaining_time": "10:02:49"}
|
| 225 |
+
{"current_steps": 1880, "total_steps": 3751, "loss": 0.2343, "lr": 5.84990089864303e-06, "epoch": 0.501191329123415, "percentage": 50.12, "elapsed_time": "10:01:37", "remaining_time": "9:58:44"}
|
| 226 |
+
{"current_steps": 1890, "total_steps": 3751, "loss": 0.2403, "lr": 5.803999932263859e-06, "epoch": 0.5038572404485396, "percentage": 50.39, "elapsed_time": "10:03:57", "remaining_time": "9:54:41"}
|
| 227 |
+
{"current_steps": 1900, "total_steps": 3751, "loss": 0.2346, "lr": 5.7580293024204455e-06, "epoch": 0.5065231517736641, "percentage": 50.65, "elapsed_time": "10:06:13", "remaining_time": "9:50:35"}
|
| 228 |
+
{"current_steps": 1900, "total_steps": 3751, "eval_loss": 0.24397991597652435, "epoch": 0.5065231517736641, "percentage": 50.65, "elapsed_time": "10:10:36", "remaining_time": "9:54:51"}
|
| 229 |
+
{"current_steps": 1910, "total_steps": 3751, "loss": 0.251, "lr": 5.7119929922889065e-06, "epoch": 0.5091890630987886, "percentage": 50.92, "elapsed_time": "10:12:57", "remaining_time": "9:50:49"}
|
| 230 |
+
{"current_steps": 1920, "total_steps": 3751, "loss": 0.2443, "lr": 5.665894990736301e-06, "epoch": 0.5118549744239133, "percentage": 51.19, "elapsed_time": "10:15:19", "remaining_time": "9:46:47"}
|
| 231 |
+
{"current_steps": 1930, "total_steps": 3751, "loss": 0.2492, "lr": 5.6197392919750095e-06, "epoch": 0.5145208857490378, "percentage": 51.45, "elapsed_time": "10:17:37", "remaining_time": "9:42:44"}
|
| 232 |
+
{"current_steps": 1940, "total_steps": 3751, "loss": 0.2472, "lr": 5.573529895216648e-06, "epoch": 0.5171867970741624, "percentage": 51.72, "elapsed_time": "10:19:57", "remaining_time": "9:38:43"}
|
| 233 |
+
{"current_steps": 1950, "total_steps": 3751, "loss": 0.2413, "lr": 5.5272708043255605e-06, "epoch": 0.5198527083992869, "percentage": 51.99, "elapsed_time": "10:22:16", "remaining_time": "9:34:43"}
|
| 234 |
+
{"current_steps": 1950, "total_steps": 3751, "eval_loss": 0.24250540137290955, "epoch": 0.5198527083992869, "percentage": 51.99, "elapsed_time": "10:26:39", "remaining_time": "9:38:46"}
|
| 235 |
+
{"current_steps": 1960, "total_steps": 3751, "loss": 0.237, "lr": 5.480966027471889e-06, "epoch": 0.5225186197244114, "percentage": 52.25, "elapsed_time": "10:28:59", "remaining_time": "9:34:45"}
|
| 236 |
+
{"current_steps": 1970, "total_steps": 3751, "loss": 0.2449, "lr": 5.434619576784288e-06, "epoch": 0.525184531049536, "percentage": 52.52, "elapsed_time": "10:31:19", "remaining_time": "9:30:45"}
|
| 237 |
+
{"current_steps": 1980, "total_steps": 3751, "loss": 0.2237, "lr": 5.388235468002286e-06, "epoch": 0.5278504423746605, "percentage": 52.79, "elapsed_time": "10:33:38", "remaining_time": "9:26:45"}
|
| 238 |
+
{"current_steps": 1990, "total_steps": 3751, "loss": 0.2454, "lr": 5.341817720128344e-06, "epoch": 0.5305163536997851, "percentage": 53.05, "elapsed_time": "10:35:55", "remaining_time": "9:22:44"}
|
| 239 |
+
{"current_steps": 2000, "total_steps": 3751, "loss": 0.24, "lr": 5.295370355079615e-06, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:38:17", "remaining_time": "9:18:49"}
|
| 240 |
+
{"current_steps": 2000, "total_steps": 3751, "eval_loss": 0.2383483648300171, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:42:40", "remaining_time": "9:22:39"}
|