Training in progress, step 1000
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b0ad16903b58549a1cd7dfe71d3dea73dfc74f0ec78e354d89a6bd5051f7e066
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80956613f670eb36f51608e5ef8a9c8654de0282b5f13bf55120388f2dc8ec37
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91a6c0f12d2264897d50f324656cf73b8fb43f5ee36cb25dfac07110a973947e
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d15ffa35b6917dd5b65408c5625784a5a85a1a04d982fa4d6e699f783c23802c
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -58,3 +58,63 @@
|
|
| 58 |
{"current_steps": 490, "total_steps": 3751, "loss": 0.3109, "lr": 9.97187488356174e-06, "epoch": 0.13062965493110285, "percentage": 13.06, "elapsed_time": "2:32:51", "remaining_time": "16:57:15"}
|
| 59 |
{"current_steps": 500, "total_steps": 3751, "loss": 0.3123, "lr": 9.966729958067638e-06, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:35:08", "remaining_time": "16:48:43"}
|
| 60 |
{"current_steps": 500, "total_steps": 3751, "eval_loss": 0.3106406331062317, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:39:31", "remaining_time": "17:17:13"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
{"current_steps": 490, "total_steps": 3751, "loss": 0.3109, "lr": 9.97187488356174e-06, "epoch": 0.13062965493110285, "percentage": 13.06, "elapsed_time": "2:32:51", "remaining_time": "16:57:15"}
|
| 59 |
{"current_steps": 500, "total_steps": 3751, "loss": 0.3123, "lr": 9.966729958067638e-06, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:35:08", "remaining_time": "16:48:43"}
|
| 60 |
{"current_steps": 500, "total_steps": 3751, "eval_loss": 0.3106406331062317, "epoch": 0.1332955662562274, "percentage": 13.33, "elapsed_time": "2:39:31", "remaining_time": "17:17:13"}
|
| 61 |
+
{"current_steps": 510, "total_steps": 3751, "loss": 0.3032, "lr": 9.961154684757636e-06, "epoch": 0.13596147758135196, "percentage": 13.6, "elapsed_time": "2:42:53", "remaining_time": "17:15:08"}
|
| 62 |
+
{"current_steps": 520, "total_steps": 3751, "loss": 0.3048, "lr": 9.955149546707465e-06, "epoch": 0.1386273889064765, "percentage": 13.86, "elapsed_time": "2:45:10", "remaining_time": "17:06:19"}
|
| 63 |
+
{"current_steps": 530, "total_steps": 3751, "loss": 0.2947, "lr": 9.948715064238956e-06, "epoch": 0.14129330023160105, "percentage": 14.13, "elapsed_time": "2:47:33", "remaining_time": "16:58:20"}
|
| 64 |
+
{"current_steps": 540, "total_steps": 3751, "loss": 0.2998, "lr": 9.941851794874969e-06, "epoch": 0.14395921155672559, "percentage": 14.4, "elapsed_time": "2:49:49", "remaining_time": "16:49:51"}
|
| 65 |
+
{"current_steps": 550, "total_steps": 3751, "loss": 0.3138, "lr": 9.934560333291077e-06, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:52:08", "remaining_time": "16:41:50"}
|
| 66 |
+
{"current_steps": 550, "total_steps": 3751, "eval_loss": 0.3050285875797272, "epoch": 0.14662512288185015, "percentage": 14.66, "elapsed_time": "2:56:31", "remaining_time": "17:07:21"}
|
| 67 |
+
{"current_steps": 560, "total_steps": 3751, "loss": 0.3014, "lr": 9.926841311264037e-06, "epoch": 0.1492910342069747, "percentage": 14.93, "elapsed_time": "2:58:47", "remaining_time": "16:58:49"}
|
| 68 |
+
{"current_steps": 570, "total_steps": 3751, "loss": 0.3076, "lr": 9.918695397617064e-06, "epoch": 0.15195694553209924, "percentage": 15.2, "elapsed_time": "3:01:08", "remaining_time": "16:50:52"}
|
| 69 |
+
{"current_steps": 580, "total_steps": 3751, "loss": 0.3013, "lr": 9.91012329816186e-06, "epoch": 0.1546228568572238, "percentage": 15.46, "elapsed_time": "3:03:30", "remaining_time": "16:43:19"}
|
| 70 |
+
{"current_steps": 590, "total_steps": 3751, "loss": 0.3017, "lr": 9.901125755637473e-06, "epoch": 0.15728876818234833, "percentage": 15.73, "elapsed_time": "3:05:50", "remaining_time": "16:35:37"}
|
| 71 |
+
{"current_steps": 600, "total_steps": 3751, "loss": 0.3032, "lr": 9.89170354964594e-06, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:08:08", "remaining_time": "16:28:04"}
|
| 72 |
+
{"current_steps": 600, "total_steps": 3751, "eval_loss": 0.3046238422393799, "epoch": 0.15995467950747289, "percentage": 16.0, "elapsed_time": "3:12:31", "remaining_time": "16:51:06"}
|
| 73 |
+
{"current_steps": 610, "total_steps": 3751, "loss": 0.3027, "lr": 9.881857496584726e-06, "epoch": 0.16262059083259742, "percentage": 16.26, "elapsed_time": "3:14:53", "remaining_time": "16:43:29"}
|
| 74 |
+
{"current_steps": 620, "total_steps": 3751, "loss": 0.296, "lr": 9.871588449575999e-06, "epoch": 0.16528650215772198, "percentage": 16.53, "elapsed_time": "3:17:12", "remaining_time": "16:35:52"}
|
| 75 |
+
{"current_steps": 630, "total_steps": 3751, "loss": 0.3031, "lr": 9.860897298392712e-06, "epoch": 0.16795241348284654, "percentage": 16.8, "elapsed_time": "3:19:28", "remaining_time": "16:28:09"}
|
| 76 |
+
{"current_steps": 640, "total_steps": 3751, "loss": 0.3006, "lr": 9.849784969381488e-06, "epoch": 0.17061832480797107, "percentage": 17.06, "elapsed_time": "3:21:47", "remaining_time": "16:20:53"}
|
| 77 |
+
{"current_steps": 650, "total_steps": 3751, "loss": 0.2827, "lr": 9.83825242538238e-06, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:24:06", "remaining_time": "16:13:46"}
|
| 78 |
+
{"current_steps": 650, "total_steps": 3751, "eval_loss": 0.30168309807777405, "epoch": 0.17328423613309563, "percentage": 17.33, "elapsed_time": "3:28:29", "remaining_time": "16:34:41"}
|
| 79 |
+
{"current_steps": 660, "total_steps": 3751, "loss": 0.2814, "lr": 9.826300665645432e-06, "epoch": 0.17595014745822016, "percentage": 17.6, "elapsed_time": "3:30:46", "remaining_time": "16:27:07"}
|
| 80 |
+
{"current_steps": 670, "total_steps": 3751, "loss": 0.2852, "lr": 9.813930725744095e-06, "epoch": 0.17861605878334472, "percentage": 17.86, "elapsed_time": "3:33:09", "remaining_time": "16:20:11"}
|
| 81 |
+
{"current_steps": 680, "total_steps": 3751, "loss": 0.3016, "lr": 9.801143677485509e-06, "epoch": 0.18128197010846928, "percentage": 18.13, "elapsed_time": "3:35:31", "remaining_time": "16:13:19"}
|
| 82 |
+
{"current_steps": 690, "total_steps": 3751, "loss": 0.3023, "lr": 9.787940628817627e-06, "epoch": 0.1839478814335938, "percentage": 18.4, "elapsed_time": "3:37:50", "remaining_time": "16:06:23"}
|
| 83 |
+
{"current_steps": 700, "total_steps": 3751, "loss": 0.2953, "lr": 9.774322723733216e-06, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:40:06", "remaining_time": "15:59:22"}
|
| 84 |
+
{"current_steps": 700, "total_steps": 3751, "eval_loss": 0.2970203757286072, "epoch": 0.18661379275871837, "percentage": 18.66, "elapsed_time": "3:44:29", "remaining_time": "16:18:29"}
|
| 85 |
+
{"current_steps": 710, "total_steps": 3751, "loss": 0.2819, "lr": 9.760291142170739e-06, "epoch": 0.1892797040838429, "percentage": 18.93, "elapsed_time": "3:46:49", "remaining_time": "16:11:32"}
|
| 86 |
+
{"current_steps": 720, "total_steps": 3751, "loss": 0.2922, "lr": 9.745847099912116e-06, "epoch": 0.19194561540896746, "percentage": 19.19, "elapsed_time": "3:49:09", "remaining_time": "16:04:41"}
|
| 87 |
+
{"current_steps": 730, "total_steps": 3751, "loss": 0.2936, "lr": 9.73099184847738e-06, "epoch": 0.19461152673409202, "percentage": 19.46, "elapsed_time": "3:51:24", "remaining_time": "15:57:37"}
|
| 88 |
+
{"current_steps": 740, "total_steps": 3751, "loss": 0.2852, "lr": 9.715726675016238e-06, "epoch": 0.19727743805921655, "percentage": 19.73, "elapsed_time": "3:53:43", "remaining_time": "15:51:02"}
|
| 89 |
+
{"current_steps": 750, "total_steps": 3751, "loss": 0.2854, "lr": 9.700052902196541e-06, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "3:56:05", "remaining_time": "15:44:41"}
|
| 90 |
+
{"current_steps": 750, "total_steps": 3751, "eval_loss": 0.292442262172699, "epoch": 0.1999433493843411, "percentage": 19.99, "elapsed_time": "4:00:28", "remaining_time": "16:02:13"}
|
| 91 |
+
{"current_steps": 760, "total_steps": 3751, "loss": 0.2911, "lr": 9.68397188808969e-06, "epoch": 0.20260926070946564, "percentage": 20.26, "elapsed_time": "4:02:44", "remaining_time": "15:55:18"}
|
| 92 |
+
{"current_steps": 770, "total_steps": 3751, "loss": 0.3003, "lr": 9.667485026052956e-06, "epoch": 0.2052751720345902, "percentage": 20.53, "elapsed_time": "4:04:59", "remaining_time": "15:48:29"}
|
| 93 |
+
{"current_steps": 780, "total_steps": 3751, "loss": 0.2954, "lr": 9.650593744608754e-06, "epoch": 0.20794108335971476, "percentage": 20.79, "elapsed_time": "4:07:26", "remaining_time": "15:42:29"}
|
| 94 |
+
{"current_steps": 790, "total_steps": 3751, "loss": 0.2921, "lr": 9.633299507320862e-06, "epoch": 0.2106069946848393, "percentage": 21.06, "elapsed_time": "4:09:50", "remaining_time": "15:36:25"}
|
| 95 |
+
{"current_steps": 800, "total_steps": 3751, "loss": 0.2872, "lr": 9.615603812667618e-06, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:12:06", "remaining_time": "15:29:57"}
|
| 96 |
+
{"current_steps": 800, "total_steps": 3751, "eval_loss": 0.2895732522010803, "epoch": 0.21327290600996385, "percentage": 21.33, "elapsed_time": "4:16:29", "remaining_time": "15:46:08"}
|
| 97 |
+
{"current_steps": 810, "total_steps": 3751, "loss": 0.2907, "lr": 9.597508193912077e-06, "epoch": 0.21593881733508838, "percentage": 21.59, "elapsed_time": "4:18:46", "remaining_time": "15:39:34"}
|
| 98 |
+
{"current_steps": 820, "total_steps": 3751, "loss": 0.2867, "lr": 9.579014218969158e-06, "epoch": 0.21860472866021294, "percentage": 21.86, "elapsed_time": "4:21:10", "remaining_time": "15:33:31"}
|
| 99 |
+
{"current_steps": 830, "total_steps": 3751, "loss": 0.2942, "lr": 9.560123490269795e-06, "epoch": 0.2212706399853375, "percentage": 22.13, "elapsed_time": "4:23:30", "remaining_time": "15:27:22"}
|
| 100 |
+
{"current_steps": 840, "total_steps": 3751, "loss": 0.2832, "lr": 9.540837644622091e-06, "epoch": 0.22393655131046203, "percentage": 22.39, "elapsed_time": "4:25:51", "remaining_time": "15:21:19"}
|
| 101 |
+
{"current_steps": 850, "total_steps": 3751, "loss": 0.2866, "lr": 9.521158353069494e-06, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:28:09", "remaining_time": "15:15:13"}
|
| 102 |
+
{"current_steps": 850, "total_steps": 3751, "eval_loss": 0.28362876176834106, "epoch": 0.2266024626355866, "percentage": 22.66, "elapsed_time": "4:32:32", "remaining_time": "15:30:11"}
|
| 103 |
+
{"current_steps": 860, "total_steps": 3751, "loss": 0.2877, "lr": 9.501087320746007e-06, "epoch": 0.22926837396071112, "percentage": 22.93, "elapsed_time": "4:34:52", "remaining_time": "15:24:01"}
|
| 104 |
+
{"current_steps": 870, "total_steps": 3751, "loss": 0.2857, "lr": 9.480626286728445e-06, "epoch": 0.23193428528583568, "percentage": 23.19, "elapsed_time": "4:37:10", "remaining_time": "15:17:52"}
|
| 105 |
+
{"current_steps": 880, "total_steps": 3751, "loss": 0.2839, "lr": 9.459777023885754e-06, "epoch": 0.23460019661096024, "percentage": 23.46, "elapsed_time": "4:39:25", "remaining_time": "15:11:35"}
|
| 106 |
+
{"current_steps": 890, "total_steps": 3751, "loss": 0.2833, "lr": 9.438541338725397e-06, "epoch": 0.23726610793608477, "percentage": 23.73, "elapsed_time": "4:41:45", "remaining_time": "15:05:43"}
|
| 107 |
+
{"current_steps": 900, "total_steps": 3751, "loss": 0.2925, "lr": 9.416921071236821e-06, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:44:05", "remaining_time": "14:59:54"}
|
| 108 |
+
{"current_steps": 900, "total_steps": 3751, "eval_loss": 0.2794356942176819, "epoch": 0.23993201926120933, "percentage": 23.99, "elapsed_time": "4:48:28", "remaining_time": "15:13:48"}
|
| 109 |
+
{"current_steps": 910, "total_steps": 3751, "loss": 0.2846, "lr": 9.394918094732044e-06, "epoch": 0.24259793058633386, "percentage": 24.26, "elapsed_time": "4:50:53", "remaining_time": "15:08:10"}
|
| 110 |
+
{"current_steps": 920, "total_steps": 3751, "loss": 0.2826, "lr": 9.37253431568332e-06, "epoch": 0.24526384191145842, "percentage": 24.53, "elapsed_time": "4:53:11", "remaining_time": "15:02:12"}
|
| 111 |
+
{"current_steps": 930, "total_steps": 3751, "loss": 0.2856, "lr": 9.349771673557966e-06, "epoch": 0.24792975323658298, "percentage": 24.79, "elapsed_time": "4:55:31", "remaining_time": "14:56:24"}
|
| 112 |
+
{"current_steps": 940, "total_steps": 3751, "loss": 0.2886, "lr": 9.326632140650311e-06, "epoch": 0.2505956645617075, "percentage": 25.06, "elapsed_time": "4:57:50", "remaining_time": "14:50:39"}
|
| 113 |
+
{"current_steps": 950, "total_steps": 3751, "loss": 0.2843, "lr": 9.303117721910801e-06, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:00:09", "remaining_time": "14:44:59"}
|
| 114 |
+
{"current_steps": 950, "total_steps": 3751, "eval_loss": 0.28232333064079285, "epoch": 0.25326157588683207, "percentage": 25.33, "elapsed_time": "5:04:32", "remaining_time": "14:57:55"}
|
| 115 |
+
{"current_steps": 960, "total_steps": 3751, "loss": 0.2758, "lr": 9.279230454772282e-06, "epoch": 0.25592748721195663, "percentage": 25.59, "elapsed_time": "5:06:53", "remaining_time": "14:52:14"}
|
| 116 |
+
{"current_steps": 970, "total_steps": 3751, "loss": 0.2789, "lr": 9.25497240897346e-06, "epoch": 0.2585933985370812, "percentage": 25.86, "elapsed_time": "5:09:12", "remaining_time": "14:46:30"}
|
| 117 |
+
{"current_steps": 980, "total_steps": 3751, "loss": 0.2854, "lr": 9.23034568637957e-06, "epoch": 0.2612593098622057, "percentage": 26.13, "elapsed_time": "5:11:32", "remaining_time": "14:40:52"}
|
| 118 |
+
{"current_steps": 990, "total_steps": 3751, "loss": 0.2857, "lr": 9.205352420800253e-06, "epoch": 0.26392522118733025, "percentage": 26.39, "elapsed_time": "5:13:49", "remaining_time": "14:35:13"}
|
| 119 |
+
{"current_steps": 1000, "total_steps": 3751, "loss": 0.292, "lr": 9.179994777804677e-06, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:16:08", "remaining_time": "14:29:41"}
|
| 120 |
+
{"current_steps": 1000, "total_steps": 3751, "eval_loss": 0.27887627482414246, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:20:31", "remaining_time": "14:41:44"}
|