izzcw commited on
Commit
a0f9a4b
·
verified ·
1 Parent(s): 8fc6861

Training in progress, step 2500

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f6a18b3b5403b88ecb1b33c9566847f51cf7d6c6ba999962e7278ea5bc012f2
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3abec6bf85cbde7e5f508abd990fcf07aecca53f875ad04adf341629e4e39a86
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9aaaacbfd6a47040d3b27a90d1779024173c4c14393f22f74a86b31139c5fbc3
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b57cb7bf320de809db1bdf49f99bc20b59f95aff72c45244053b1ffe3d73a724
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b525d664730e8b50d6275487d6259a5afc14e3aeb2c28ddbabbbf181356776fd
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d903c0b14bd0e8ad7c644ee3c54828eab12b6406c0874c4ea5d1e476f3c865fc
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48e867be300fa26501aba8a4d1bcdd1fd9ba2c9963f931d58c567b2d58d5efd7
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36fc3d6354d81339dd8e005a1ea0f6dcc337e08434f12e548dadded8d9d74742
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -238,3 +238,63 @@
238
  {"current_steps": 1990, "total_steps": 3751, "loss": 0.2454, "lr": 5.341817720128344e-06, "epoch": 0.5305163536997851, "percentage": 53.05, "elapsed_time": "10:35:55", "remaining_time": "9:22:44"}
239
  {"current_steps": 2000, "total_steps": 3751, "loss": 0.24, "lr": 5.295370355079615e-06, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:38:17", "remaining_time": "9:18:49"}
240
  {"current_steps": 2000, "total_steps": 3751, "eval_loss": 0.2383483648300171, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:42:40", "remaining_time": "9:22:39"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  {"current_steps": 1990, "total_steps": 3751, "loss": 0.2454, "lr": 5.341817720128344e-06, "epoch": 0.5305163536997851, "percentage": 53.05, "elapsed_time": "10:35:55", "remaining_time": "9:22:44"}
239
  {"current_steps": 2000, "total_steps": 3751, "loss": 0.24, "lr": 5.295370355079615e-06, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:38:17", "remaining_time": "9:18:49"}
240
  {"current_steps": 2000, "total_steps": 3751, "eval_loss": 0.2383483648300171, "epoch": 0.5331822650249096, "percentage": 53.32, "elapsed_time": "10:42:40", "remaining_time": "9:22:39"}
241
+ {"current_steps": 2010, "total_steps": 3751, "loss": 0.2373, "lr": 5.248897397339462e-06, "epoch": 0.5358481763500341, "percentage": 53.59, "elapsed_time": "10:46:20", "remaining_time": "9:19:50"}
242
+ {"current_steps": 2020, "total_steps": 3751, "loss": 0.2355, "lr": 5.202402873608763e-06, "epoch": 0.5385140876751587, "percentage": 53.85, "elapsed_time": "10:48:42", "remaining_time": "9:15:53"}
243
+ {"current_steps": 2030, "total_steps": 3751, "loss": 0.2415, "lr": 5.155890812457e-06, "epoch": 0.5411799990002832, "percentage": 54.12, "elapsed_time": "10:51:01", "remaining_time": "9:11:55"}
244
+ {"current_steps": 2040, "total_steps": 3751, "loss": 0.2319, "lr": 5.1093652439732036e-06, "epoch": 0.5438459103254079, "percentage": 54.39, "elapsed_time": "10:53:19", "remaining_time": "9:07:57"}
245
+ {"current_steps": 2050, "total_steps": 3751, "loss": 0.2398, "lr": 5.062830199416764e-06, "epoch": 0.5465118216505324, "percentage": 54.65, "elapsed_time": "10:55:39", "remaining_time": "9:04:02"}
246
+ {"current_steps": 2050, "total_steps": 3751, "eval_loss": 0.24081052839756012, "epoch": 0.5465118216505324, "percentage": 54.65, "elapsed_time": "11:00:02", "remaining_time": "9:07:40"}
247
+ {"current_steps": 2060, "total_steps": 3751, "loss": 0.2393, "lr": 5.0162897108681375e-06, "epoch": 0.5491777329756569, "percentage": 54.92, "elapsed_time": "11:02:20", "remaining_time": "9:03:41"}
248
+ {"current_steps": 2070, "total_steps": 3751, "loss": 0.2447, "lr": 4.969747810879478e-06, "epoch": 0.5518436443007815, "percentage": 55.19, "elapsed_time": "11:04:36", "remaining_time": "8:59:42"}
249
+ {"current_steps": 2080, "total_steps": 3751, "loss": 0.2342, "lr": 4.923208532125235e-06, "epoch": 0.554509555625906, "percentage": 55.45, "elapsed_time": "11:06:54", "remaining_time": "8:55:46"}
250
+ {"current_steps": 2090, "total_steps": 3751, "loss": 0.2355, "lr": 4.876675907052745e-06, "epoch": 0.5571754669510306, "percentage": 55.72, "elapsed_time": "11:09:13", "remaining_time": "8:51:51"}
251
+ {"current_steps": 2100, "total_steps": 3751, "loss": 0.2473, "lr": 4.8301539675328205e-06, "epoch": 0.5598413782761551, "percentage": 55.99, "elapsed_time": "11:11:30", "remaining_time": "8:47:55"}
252
+ {"current_steps": 2100, "total_steps": 3751, "eval_loss": 0.23838448524475098, "epoch": 0.5598413782761551, "percentage": 55.99, "elapsed_time": "11:15:52", "remaining_time": "8:51:22"}
253
+ {"current_steps": 2110, "total_steps": 3751, "loss": 0.2202, "lr": 4.783646744510416e-06, "epoch": 0.5625072896012796, "percentage": 56.25, "elapsed_time": "11:18:09", "remaining_time": "8:47:25"}
254
+ {"current_steps": 2120, "total_steps": 3751, "loss": 0.2367, "lr": 4.737158267655358e-06, "epoch": 0.5651732009264042, "percentage": 56.52, "elapsed_time": "11:20:28", "remaining_time": "8:43:30"}
255
+ {"current_steps": 2130, "total_steps": 3751, "loss": 0.2355, "lr": 4.690692565013193e-06, "epoch": 0.5678391122515287, "percentage": 56.78, "elapsed_time": "11:22:45", "remaining_time": "8:39:35"}
256
+ {"current_steps": 2140, "total_steps": 3751, "loss": 0.2306, "lr": 4.644253662656167e-06, "epoch": 0.5705050235766533, "percentage": 57.05, "elapsed_time": "11:25:03", "remaining_time": "8:35:43"}
257
+ {"current_steps": 2150, "total_steps": 3751, "loss": 0.2423, "lr": 4.597845584334387e-06, "epoch": 0.5731709349017778, "percentage": 57.32, "elapsed_time": "11:27:23", "remaining_time": "8:31:52"}
258
+ {"current_steps": 2150, "total_steps": 3751, "eval_loss": 0.234757199883461, "epoch": 0.5731709349017778, "percentage": 57.32, "elapsed_time": "11:31:46", "remaining_time": "8:35:08"}
259
+ {"current_steps": 2160, "total_steps": 3751, "loss": 0.2273, "lr": 4.55147235112718e-06, "epoch": 0.5758368462269023, "percentage": 57.58, "elapsed_time": "11:34:03", "remaining_time": "8:31:13"}
260
+ {"current_steps": 2170, "total_steps": 3751, "loss": 0.2343, "lr": 4.505137981094675e-06, "epoch": 0.578502757552027, "percentage": 57.85, "elapsed_time": "11:36:23", "remaining_time": "8:27:22"}
261
+ {"current_steps": 2180, "total_steps": 3751, "loss": 0.2324, "lr": 4.458846488929663e-06, "epoch": 0.5811686688771515, "percentage": 58.12, "elapsed_time": "11:38:41", "remaining_time": "8:23:30"}
262
+ {"current_steps": 2190, "total_steps": 3751, "loss": 0.2345, "lr": 4.41260188560973e-06, "epoch": 0.5838345802022761, "percentage": 58.38, "elapsed_time": "11:40:58", "remaining_time": "8:19:38"}
263
+ {"current_steps": 2200, "total_steps": 3751, "loss": 0.2294, "lr": 4.366408178049728e-06, "epoch": 0.5865004915274006, "percentage": 58.65, "elapsed_time": "11:43:14", "remaining_time": "8:15:47"}
264
+ {"current_steps": 2200, "total_steps": 3751, "eval_loss": 0.23114009201526642, "epoch": 0.5865004915274006, "percentage": 58.65, "elapsed_time": "11:47:37", "remaining_time": "8:18:52"}
265
+ {"current_steps": 2210, "total_steps": 3751, "loss": 0.2276, "lr": 4.32026936875459e-06, "epoch": 0.5891664028525251, "percentage": 58.92, "elapsed_time": "11:49:56", "remaining_time": "8:15:01"}
266
+ {"current_steps": 2220, "total_steps": 3751, "loss": 0.2273, "lr": 4.274189455472529e-06, "epoch": 0.5918323141776497, "percentage": 59.18, "elapsed_time": "11:52:13", "remaining_time": "8:11:10"}
267
+ {"current_steps": 2230, "total_steps": 3751, "loss": 0.2288, "lr": 4.228172430848645e-06, "epoch": 0.5944982255027742, "percentage": 59.45, "elapsed_time": "11:54:29", "remaining_time": "8:07:19"}
268
+ {"current_steps": 2240, "total_steps": 3751, "loss": 0.2357, "lr": 4.182222282078983e-06, "epoch": 0.5971641368278988, "percentage": 59.72, "elapsed_time": "11:56:49", "remaining_time": "8:03:32"}
269
+ {"current_steps": 2250, "total_steps": 3751, "loss": 0.2403, "lr": 4.136342990565055e-06, "epoch": 0.5998300481530233, "percentage": 59.98, "elapsed_time": "11:59:07", "remaining_time": "7:59:44"}
270
+ {"current_steps": 2250, "total_steps": 3751, "eval_loss": 0.23227499425411224, "epoch": 0.5998300481530233, "percentage": 59.98, "elapsed_time": "12:03:30", "remaining_time": "8:02:39"}
271
+ {"current_steps": 2260, "total_steps": 3751, "loss": 0.2296, "lr": 4.090538531568867e-06, "epoch": 0.6024959594781478, "percentage": 60.25, "elapsed_time": "12:05:47", "remaining_time": "7:58:50"}
272
+ {"current_steps": 2270, "total_steps": 3751, "loss": 0.2328, "lr": 4.0448128738684775e-06, "epoch": 0.6051618708032724, "percentage": 60.52, "elapsed_time": "12:08:07", "remaining_time": "7:55:02"}
273
+ {"current_steps": 2280, "total_steps": 3751, "loss": 0.2325, "lr": 3.999169979414123e-06, "epoch": 0.6078277821283969, "percentage": 60.78, "elapsed_time": "12:10:25", "remaining_time": "7:51:15"}
274
+ {"current_steps": 2290, "total_steps": 3751, "loss": 0.2168, "lr": 3.9536138029849244e-06, "epoch": 0.6104936934535216, "percentage": 61.05, "elapsed_time": "12:12:45", "remaining_time": "7:47:29"}
275
+ {"current_steps": 2300, "total_steps": 3751, "loss": 0.2319, "lr": 3.908148291846225e-06, "epoch": 0.6131596047786461, "percentage": 61.32, "elapsed_time": "12:15:03", "remaining_time": "7:43:43"}
276
+ {"current_steps": 2300, "total_steps": 3751, "eval_loss": 0.22970426082611084, "epoch": 0.6131596047786461, "percentage": 61.32, "elapsed_time": "12:19:26", "remaining_time": "7:46:29"}
277
+ {"current_steps": 2310, "total_steps": 3751, "loss": 0.2341, "lr": 3.862777385407569e-06, "epoch": 0.6158255161037706, "percentage": 61.58, "elapsed_time": "12:21:41", "remaining_time": "7:42:40"}
278
+ {"current_steps": 2320, "total_steps": 3751, "loss": 0.2233, "lr": 3.817505014881378e-06, "epoch": 0.6184914274288952, "percentage": 61.85, "elapsed_time": "12:24:04", "remaining_time": "7:38:57"}
279
+ {"current_steps": 2330, "total_steps": 3751, "loss": 0.2267, "lr": 3.7723351029423143e-06, "epoch": 0.6211573387540197, "percentage": 62.12, "elapsed_time": "12:26:21", "remaining_time": "7:35:10"}
280
+ {"current_steps": 2340, "total_steps": 3751, "loss": 0.2217, "lr": 3.72727156338741e-06, "epoch": 0.6238232500791442, "percentage": 62.38, "elapsed_time": "12:28:40", "remaining_time": "7:31:26"}
281
+ {"current_steps": 2350, "total_steps": 3751, "loss": 0.222, "lr": 3.6823183007969375e-06, "epoch": 0.6264891614042688, "percentage": 62.65, "elapsed_time": "12:30:56", "remaining_time": "7:27:41"}
282
+ {"current_steps": 2350, "total_steps": 3751, "eval_loss": 0.22879666090011597, "epoch": 0.6264891614042688, "percentage": 62.65, "elapsed_time": "12:35:19", "remaining_time": "7:30:17"}
283
+ {"current_steps": 2360, "total_steps": 3751, "loss": 0.224, "lr": 3.637479210196102e-06, "epoch": 0.6291550727293933, "percentage": 62.92, "elapsed_time": "12:37:40", "remaining_time": "7:26:34"}
284
+ {"current_steps": 2370, "total_steps": 3751, "loss": 0.2324, "lr": 3.59275817671755e-06, "epoch": 0.6318209840545179, "percentage": 63.18, "elapsed_time": "12:39:57", "remaining_time": "7:22:49"}
285
+ {"current_steps": 2380, "total_steps": 3751, "loss": 0.2402, "lr": 3.548159075264738e-06, "epoch": 0.6344868953796424, "percentage": 63.45, "elapsed_time": "12:42:18", "remaining_time": "7:19:07"}
286
+ {"current_steps": 2390, "total_steps": 3751, "loss": 0.2296, "lr": 3.5036857701761857e-06, "epoch": 0.6371528067047669, "percentage": 63.72, "elapsed_time": "12:44:40", "remaining_time": "7:15:26"}
287
+ {"current_steps": 2400, "total_steps": 3751, "loss": 0.2193, "lr": 3.4593421148906523e-06, "epoch": 0.6398187180298915, "percentage": 63.98, "elapsed_time": "12:47:00", "remaining_time": "7:11:45"}
288
+ {"current_steps": 2400, "total_steps": 3751, "eval_loss": 0.2302982062101364, "epoch": 0.6398187180298915, "percentage": 63.98, "elapsed_time": "12:51:23", "remaining_time": "7:14:13"}
289
+ {"current_steps": 2410, "total_steps": 3751, "loss": 0.2241, "lr": 3.4151319516132414e-06, "epoch": 0.642484629355016, "percentage": 64.25, "elapsed_time": "12:53:45", "remaining_time": "7:10:32"}
290
+ {"current_steps": 2420, "total_steps": 3751, "loss": 0.228, "lr": 3.3710591109824954e-06, "epoch": 0.6451505406801407, "percentage": 64.52, "elapsed_time": "12:56:06", "remaining_time": "7:06:51"}
291
+ {"current_steps": 2430, "total_steps": 3751, "loss": 0.2248, "lr": 3.3271274117384834e-06, "epoch": 0.6478164520052652, "percentage": 64.78, "elapsed_time": "12:58:26", "remaining_time": "7:03:10"}
292
+ {"current_steps": 2440, "total_steps": 3751, "loss": 0.2255, "lr": 3.2833406603919243e-06, "epoch": 0.6504823633303897, "percentage": 65.05, "elapsed_time": "13:00:42", "remaining_time": "6:59:28"}
293
+ {"current_steps": 2450, "total_steps": 3751, "loss": 0.2252, "lr": 3.239702650894364e-06, "epoch": 0.6531482746555143, "percentage": 65.32, "elapsed_time": "13:03:00", "remaining_time": "6:55:47"}
294
+ {"current_steps": 2450, "total_steps": 3751, "eval_loss": 0.22466669976711273, "epoch": 0.6531482746555143, "percentage": 65.32, "elapsed_time": "13:07:23", "remaining_time": "6:58:07"}
295
+ {"current_steps": 2460, "total_steps": 3751, "loss": 0.2262, "lr": 3.1962171643094474e-06, "epoch": 0.6558141859806388, "percentage": 65.58, "elapsed_time": "13:09:41", "remaining_time": "6:54:25"}
296
+ {"current_steps": 2470, "total_steps": 3751, "loss": 0.2268, "lr": 3.152887968485303e-06, "epoch": 0.6584800973057634, "percentage": 65.85, "elapsed_time": "13:12:00", "remaining_time": "6:50:45"}
297
+ {"current_steps": 2480, "total_steps": 3751, "loss": 0.2307, "lr": 3.1097188177280735e-06, "epoch": 0.6611460086308879, "percentage": 66.12, "elapsed_time": "13:14:18", "remaining_time": "6:47:04"}
298
+ {"current_steps": 2490, "total_steps": 3751, "loss": 0.2306, "lr": 3.0667134524766173e-06, "epoch": 0.6638119199560124, "percentage": 66.38, "elapsed_time": "13:16:39", "remaining_time": "6:43:26"}
299
+ {"current_steps": 2500, "total_steps": 3751, "loss": 0.2304, "lr": 3.023875598978419e-06, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:18:59", "remaining_time": "6:39:48"}
300
+ {"current_steps": 2500, "total_steps": 3751, "eval_loss": 0.22111880779266357, "epoch": 0.666477831281137, "percentage": 66.65, "elapsed_time": "13:23:22", "remaining_time": "6:42:00"}