izzcw commited on
Commit
8fc6861
·
verified ·
1 Parent(s): b898c21

Training in progress, step 2000

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba120eb3a6be8279cf4ceae51e196e87f5137c5b61203f90835697cf0d9e9940
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f6a18b3b5403b88ecb1b33c9566847f51cf7d6c6ba999962e7278ea5bc012f2
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:358b31816e477d0e0ec409a53e8807a908db72615bfc8df56d3aa0987fcf8cba
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aaaacbfd6a47040d3b27a90d1779024173c4c14393f22f74a86b31139c5fbc3
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eefe1427d92bd58ea6d35c93566e4144bd660de3458fa497b58f0d505ff6ba4
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b525d664730e8b50d6275487d6259a5afc14e3aeb2c28ddbabbbf181356776fd
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d889cead7b26dcbcb7e7b1c77e716c943ccc2114e3da06eebd22b2c37fc61110
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e867be300fa26501aba8a4d1bcdd1fd9ba2c9963f931d58c567b2d58d5efd7
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -180,3 +180,61 @@
180
  {"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.2582685649394989, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:01:36", "remaining_time": "12:02:44"}
181
  {"current_steps": 1510, "total_steps": 3751, "loss": 0.2604, "lr": 7.463636707741458e-06, "epoch": 0.4025526100938068, "percentage": 40.26, "elapsed_time": "8:04:56", "remaining_time": "11:59:42"}
182
  {"current_steps": 1520, "total_steps": 3751, "loss": 0.264, "lr": 7.423030369365175e-06, "epoch": 0.4052185214189313, "percentage": 40.52, "elapsed_time": "8:07:14", "remaining_time": "11:55:09"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
  {"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.2582685649394989, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:01:36", "remaining_time": "12:02:44"}
181
  {"current_steps": 1510, "total_steps": 3751, "loss": 0.2604, "lr": 7.463636707741458e-06, "epoch": 0.4025526100938068, "percentage": 40.26, "elapsed_time": "8:04:56", "remaining_time": "11:59:42"}
182
  {"current_steps": 1520, "total_steps": 3751, "loss": 0.264, "lr": 7.423030369365175e-06, "epoch": 0.4052185214189313, "percentage": 40.52, "elapsed_time": "8:07:14", "remaining_time": "11:55:09"}
183
+ {"current_steps": 1530, "total_steps": 3751, "loss": 0.2565, "lr": 7.382214084839993e-06, "epoch": 0.40788443274405584, "percentage": 40.79, "elapsed_time": "8:09:34", "remaining_time": "11:50:41"}
184
+ {"current_steps": 1540, "total_steps": 3751, "loss": 0.2638, "lr": 7.341191390738073e-06, "epoch": 0.4105503440691804, "percentage": 41.06, "elapsed_time": "8:11:52", "remaining_time": "11:46:11"}
185
+ {"current_steps": 1550, "total_steps": 3751, "loss": 0.2545, "lr": 7.299965841516164e-06, "epoch": 0.41321625539430495, "percentage": 41.32, "elapsed_time": "8:14:10", "remaining_time": "11:41:43"}
186
+ {"current_steps": 1550, "total_steps": 3751, "eval_loss": 0.25675299763679504, "epoch": 0.41321625539430495, "percentage": 41.32, "elapsed_time": "8:18:33", "remaining_time": "11:47:56"}
187
+ {"current_steps": 1560, "total_steps": 3751, "loss": 0.2637, "lr": 7.2585410092076154e-06, "epoch": 0.4158821667194295, "percentage": 41.59, "elapsed_time": "8:20:57", "remaining_time": "11:43:34"}
188
+ {"current_steps": 1570, "total_steps": 3751, "loss": 0.2489, "lr": 7.216920483112886e-06, "epoch": 0.418548078044554, "percentage": 41.86, "elapsed_time": "8:23:17", "remaining_time": "11:39:08"}
189
+ {"current_steps": 1580, "total_steps": 3751, "loss": 0.2545, "lr": 7.175107869488539e-06, "epoch": 0.4212139893696786, "percentage": 42.12, "elapsed_time": "8:25:35", "remaining_time": "11:34:42"}
190
+ {"current_steps": 1590, "total_steps": 3751, "loss": 0.2644, "lr": 7.133106791234771e-06, "epoch": 0.42387990069480314, "percentage": 42.39, "elapsed_time": "8:27:55", "remaining_time": "11:30:20"}
191
+ {"current_steps": 1600, "total_steps": 3751, "loss": 0.2618, "lr": 7.090920887581507e-06, "epoch": 0.4265458120199277, "percentage": 42.66, "elapsed_time": "8:30:19", "remaining_time": "11:26:03"}
192
+ {"current_steps": 1600, "total_steps": 3751, "eval_loss": 0.25225213170051575, "epoch": 0.4265458120199277, "percentage": 42.66, "elapsed_time": "8:34:42", "remaining_time": "11:31:57"}
193
+ {"current_steps": 1610, "total_steps": 3751, "loss": 0.246, "lr": 7.048553813773075e-06, "epoch": 0.42921172334505225, "percentage": 42.92, "elapsed_time": "8:37:06", "remaining_time": "11:27:39"}
194
+ {"current_steps": 1620, "total_steps": 3751, "loss": 0.248, "lr": 7.006009240751488e-06, "epoch": 0.43187763467017676, "percentage": 43.19, "elapsed_time": "8:39:24", "remaining_time": "11:23:15"}
195
+ {"current_steps": 1630, "total_steps": 3751, "loss": 0.2592, "lr": 6.963290854838376e-06, "epoch": 0.4345435459953013, "percentage": 43.46, "elapsed_time": "8:41:42", "remaining_time": "11:18:52"}
196
+ {"current_steps": 1640, "total_steps": 3751, "loss": 0.2502, "lr": 6.920402357415582e-06, "epoch": 0.4372094573204259, "percentage": 43.72, "elapsed_time": "8:44:03", "remaining_time": "11:14:33"}
197
+ {"current_steps": 1650, "total_steps": 3751, "loss": 0.2615, "lr": 6.877347464604446e-06, "epoch": 0.43987536864555044, "percentage": 43.99, "elapsed_time": "8:46:23", "remaining_time": "11:10:16"}
198
+ {"current_steps": 1650, "total_steps": 3751, "eval_loss": 0.2549818754196167, "epoch": 0.43987536864555044, "percentage": 43.99, "elapsed_time": "8:50:47", "remaining_time": "11:15:51"}
199
+ {"current_steps": 1660, "total_steps": 3751, "loss": 0.2468, "lr": 6.834129906943822e-06, "epoch": 0.442541279970675, "percentage": 44.25, "elapsed_time": "8:53:04", "remaining_time": "11:11:29"}
200
+ {"current_steps": 1670, "total_steps": 3751, "loss": 0.2522, "lr": 6.790753429066838e-06, "epoch": 0.4452071912957995, "percentage": 44.52, "elapsed_time": "8:55:21", "remaining_time": "11:07:06"}
201
+ {"current_steps": 1680, "total_steps": 3751, "loss": 0.2468, "lr": 6.7472217893764465e-06, "epoch": 0.44787310262092406, "percentage": 44.79, "elapsed_time": "8:57:35", "remaining_time": "11:02:42"}
202
+ {"current_steps": 1690, "total_steps": 3751, "loss": 0.2525, "lr": 6.70353875971976e-06, "epoch": 0.4505390139460486, "percentage": 45.05, "elapsed_time": "8:59:56", "remaining_time": "10:58:28"}
203
+ {"current_steps": 1700, "total_steps": 3751, "loss": 0.2512, "lr": 6.659708125061242e-06, "epoch": 0.4532049252711732, "percentage": 45.32, "elapsed_time": "9:02:16", "remaining_time": "10:54:14"}
204
+ {"current_steps": 1700, "total_steps": 3751, "eval_loss": 0.24882382154464722, "epoch": 0.4532049252711732, "percentage": 45.32, "elapsed_time": "9:06:39", "remaining_time": "10:59:31"}
205
+ {"current_steps": 1710, "total_steps": 3751, "loss": 0.2522, "lr": 6.615733683154762e-06, "epoch": 0.45587083659629773, "percentage": 45.59, "elapsed_time": "9:09:01", "remaining_time": "10:55:17"}
206
+ {"current_steps": 1720, "total_steps": 3751, "loss": 0.2505, "lr": 6.571619244214521e-06, "epoch": 0.45853674792142224, "percentage": 45.85, "elapsed_time": "9:11:22", "remaining_time": "10:51:03"}
207
+ {"current_steps": 1730, "total_steps": 3751, "loss": 0.2596, "lr": 6.527368630584919e-06, "epoch": 0.4612026592465468, "percentage": 46.12, "elapsed_time": "9:13:40", "remaining_time": "10:46:48"}
208
+ {"current_steps": 1740, "total_steps": 3751, "loss": 0.2494, "lr": 6.482985676409368e-06, "epoch": 0.46386857057167136, "percentage": 46.39, "elapsed_time": "9:15:57", "remaining_time": "10:42:32"}
209
+ {"current_steps": 1750, "total_steps": 3751, "loss": 0.245, "lr": 6.438474227298065e-06, "epoch": 0.4665344818967959, "percentage": 46.65, "elapsed_time": "9:18:14", "remaining_time": "10:38:18"}
210
+ {"current_steps": 1750, "total_steps": 3751, "eval_loss": 0.25038692355155945, "epoch": 0.4665344818967959, "percentage": 46.65, "elapsed_time": "9:22:36", "remaining_time": "10:43:18"}
211
+ {"current_steps": 1760, "total_steps": 3751, "loss": 0.2595, "lr": 6.393838139994797e-06, "epoch": 0.4692003932219205, "percentage": 46.92, "elapsed_time": "9:24:58", "remaining_time": "10:39:07"}
212
+ {"current_steps": 1770, "total_steps": 3751, "loss": 0.2516, "lr": 6.349081282042768e-06, "epoch": 0.471866304547045, "percentage": 47.19, "elapsed_time": "9:27:16", "remaining_time": "10:34:53"}
213
+ {"current_steps": 1780, "total_steps": 3751, "loss": 0.2469, "lr": 6.304207531449486e-06, "epoch": 0.47453221587216954, "percentage": 47.45, "elapsed_time": "9:29:35", "remaining_time": "10:30:42"}
214
+ {"current_steps": 1790, "total_steps": 3751, "loss": 0.2354, "lr": 6.259220776350746e-06, "epoch": 0.4771981271972941, "percentage": 47.72, "elapsed_time": "9:31:59", "remaining_time": "10:26:37"}
215
+ {"current_steps": 1800, "total_steps": 3751, "loss": 0.2503, "lr": 6.2141249146737545e-06, "epoch": 0.47986403852241866, "percentage": 47.99, "elapsed_time": "9:34:19", "remaining_time": "10:22:30"}
216
+ {"current_steps": 1800, "total_steps": 3751, "eval_loss": 0.24808603525161743, "epoch": 0.47986403852241866, "percentage": 47.99, "elapsed_time": "9:38:42", "remaining_time": "10:27:15"}
217
+ {"current_steps": 1810, "total_steps": 3751, "loss": 0.2466, "lr": 6.168923853799369e-06, "epoch": 0.4825299498475432, "percentage": 48.25, "elapsed_time": "9:41:02", "remaining_time": "10:23:06"}
218
+ {"current_steps": 1820, "total_steps": 3751, "loss": 0.2467, "lr": 6.123621510223552e-06, "epoch": 0.4851958611726677, "percentage": 48.52, "elapsed_time": "9:43:21", "remaining_time": "10:18:56"}
219
+ {"current_steps": 1830, "total_steps": 3751, "loss": 0.2523, "lr": 6.0782218092180164e-06, "epoch": 0.4878617724977923, "percentage": 48.79, "elapsed_time": "9:45:40", "remaining_time": "10:14:47"}
220
+ {"current_steps": 1840, "total_steps": 3751, "loss": 0.2428, "lr": 6.032728684490118e-06, "epoch": 0.49052768382291684, "percentage": 49.05, "elapsed_time": "9:47:56", "remaining_time": "10:10:37"}
221
+ {"current_steps": 1850, "total_steps": 3751, "loss": 0.2402, "lr": 5.987146077842015e-06, "epoch": 0.4931935951480414, "percentage": 49.32, "elapsed_time": "9:50:14", "remaining_time": "10:06:31"}
222
+ {"current_steps": 1850, "total_steps": 3751, "eval_loss": 0.24496783316135406, "epoch": 0.4931935951480414, "percentage": 49.32, "elapsed_time": "9:54:37", "remaining_time": "10:11:01"}
223
+ {"current_steps": 1860, "total_steps": 3751, "loss": 0.2526, "lr": 5.941477938829126e-06, "epoch": 0.49585950647316596, "percentage": 49.59, "elapsed_time": "9:56:55", "remaining_time": "10:06:52"}
224
+ {"current_steps": 1870, "total_steps": 3751, "loss": 0.2462, "lr": 5.8957282244179125e-06, "epoch": 0.49852541779829046, "percentage": 49.85, "elapsed_time": "9:59:17", "remaining_time": "10:02:49"}
225
+ {"current_steps": 1880, "total_steps": 3751, "loss": 0.2343, "lr": 5.84990089864303e-06, "epoch": 0.501191329123415, "percentage": 50.12, "elapsed_time": "10:01:37", "remaining_time": "9:58:44"}
226
+ {"current_steps": 1890, "total_steps": 3751, "loss": 0.2403, "lr": 5.803999932263859e-06, "epoch": 0.5038572404485396, "percentage": 50.39, "elapsed_time": "10:03:57", "remaining_time": "9:54:41"}
227
+ {"current_steps": 1900, "total_steps": 3751, "loss": 0.2346, "lr": 5.7580293024204455e-06, "epoch": 0.5065231517736641, "percentage": 50.65, "elapsed_time": "10:06:13", "remaining_time": "9:50:35"}
228
+ {"current_steps": 1900, "total_steps": 3751, "eval_loss": 0.24397991597652435, "epoch": 0.5065231517736641, "percentage": 50.65, "elapsed_time": "10:10:36", "remaining_time": "9:54:51"}
229
+ {"current_steps": 1910, "total_steps": 3751, "loss": 0.251, "lr": 5.7119929922889065e-06, "epoch": 0.5091890630987886, "percentage": 50.92, "elapsed_time": "10:12:57", "remaining_time": "9:50:49"}
230
+ {"current_steps": 1920, "total_steps": 3751, "loss": 0.2443, "lr": 5.665894990736301e-06, "epoch": 0.5118549744239133, "percentage": 51.19, "elapsed_time": "10:15:19", "remaining_time": "9:46:47"}
231
+ {"current_steps": 1930, "total_steps": 3751, "loss": 0.2492, "lr": 5.6197392919750095e-06, "epoch": 0.5145208857490378, "percentage": 51.45, "elapsed_time": "10:17:37", "remaining_time": "9:42:44"}
232
+ {"current_steps": 1940, "total_steps": 3751, "loss": 0.2472, "lr": 5.573529895216648e-06, "epoch": 0.5171867970741624, "percentage": 51.72, "elapsed_time": "10:19:57", "remaining_time": "9:38:43"}
233
+ {"current_steps": 1950, "total_steps": 3751, "loss": 0.2413, "lr": 5.5272708043255605e-06, "epoch": 0.5198527083992869, "percentage": 51.99, "elapsed_time": "10:22:16", "remaining_time": "9:34:43"}
234
+ {"current_steps": 1950, "total_steps": 3751, "eval_loss": 0.24250540137290955, "epoch": 0.5198527083992869, "percentage": 51.99, "elapsed_time": "10:26:39", "remaining_time": "9:38:46"}
235
+ {"current_steps": 1960, "total_steps": 3751, "loss": 0.237, "lr": 5.480966027471889e-06, "epoch": 0.5225186197244114, "percentage": 52.25, "elapsed_time": "10:28:59", "remaining_time": "9:34:45"}
236
+ {"current_steps": 1970, "total_steps": 3751, "loss": 0.2449, "lr": 5.434619576784288e-06, "epoch": 0.525184531049536, "percentage": 52.52, "elapsed_time": "10:31:19", "remaining_time": "9:30:45"}
237
+ {"current_steps": 1980, "total_steps": 3751, "loss": 0.2237, "lr": 5.388235468002286e-06, "epoch": 0.5278504423746605, "percentage": 52.79, "elapsed_time": "10:33:38", "remaining_time": "9:26:45"}
238
+ {"current_steps": 1990, "total_steps": 3751, "loss": 0.2454, "lr": 5.341817720128344e-06, "epoch": 0.5305163536997851, "percentage": 53.05, "elapsed_time": "10:35:55", "remaining_time": "9:22:44"}
239
+ {"current_steps": 2000, "total_steps": 3751, "loss": 0.24, "lr": 5.295370355079615e-06, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:38:17", "remaining_time": "9:18:49"}
240
+ {"current_steps": 2000, "total_steps": 3751, "eval_loss": 0.2383483648300171, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:42:40", "remaining_time": "9:22:39"}