izzcw commited on
Commit
b898c21
·
verified ·
1 Parent(s): 382e9eb

Training in progress, step 1500

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0ad16903b58549a1cd7dfe71d3dea73dfc74f0ec78e354d89a6bd5051f7e066
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba120eb3a6be8279cf4ceae51e196e87f5137c5b61203f90835697cf0d9e9940
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:80956613f670eb36f51608e5ef8a9c8654de0282b5f13bf55120388f2dc8ec37
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358b31816e477d0e0ec409a53e8807a908db72615bfc8df56d3aa0987fcf8cba
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91a6c0f12d2264897d50f324656cf73b8fb43f5ee36cb25dfac07110a973947e
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2eefe1427d92bd58ea6d35c93566e4144bd660de3458fa497b58f0d505ff6ba4
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d15ffa35b6917dd5b65408c5625784a5a85a1a04d982fa4d6e699f783c23802c
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d889cead7b26dcbcb7e7b1c77e716c943ccc2114e3da06eebd22b2c37fc61110
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -118,3 +118,65 @@
118
  {"current_steps": 990, "total_steps": 3751, "loss": 0.2857, "lr": 9.205352420800253e-06, "epoch": 0.26392522118733025, "percentage": 26.39, "elapsed_time": "5:13:49", "remaining_time": "14:35:13"}
119
  {"current_steps": 1000, "total_steps": 3751, "loss": 0.292, "lr": 9.179994777804677e-06, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:16:08", "remaining_time": "14:29:41"}
120
  {"current_steps": 1000, "total_steps": 3751, "eval_loss": 0.27887627482414246, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:20:31", "remaining_time": "14:41:44"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  {"current_steps": 990, "total_steps": 3751, "loss": 0.2857, "lr": 9.205352420800253e-06, "epoch": 0.26392522118733025, "percentage": 26.39, "elapsed_time": "5:13:49", "remaining_time": "14:35:13"}
119
  {"current_steps": 1000, "total_steps": 3751, "loss": 0.292, "lr": 9.179994777804677e-06, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:16:08", "remaining_time": "14:29:41"}
120
  {"current_steps": 1000, "total_steps": 3751, "eval_loss": 0.27887627482414246, "epoch": 0.2665911325124548, "percentage": 26.66, "elapsed_time": "5:20:31", "remaining_time": "14:41:44"}
121
+ {"current_steps": 1010, "total_steps": 3751, "loss": 0.2863, "lr": 9.154274954533895e-06, "epoch": 0.26925704383757937, "percentage": 26.93, "elapsed_time": "5:24:11", "remaining_time": "14:39:49"}
122
+ {"current_steps": 1020, "total_steps": 3751, "loss": 0.279, "lr": 9.128195179510466e-06, "epoch": 0.2719229551627039, "percentage": 27.19, "elapsed_time": "5:26:32", "remaining_time": "14:34:18"}
123
+ {"current_steps": 1030, "total_steps": 3751, "loss": 0.2743, "lr": 9.101757712445369e-06, "epoch": 0.27458886648782843, "percentage": 27.46, "elapsed_time": "5:28:53", "remaining_time": "14:28:50"}
124
+ {"current_steps": 1040, "total_steps": 3751, "loss": 0.2757, "lr": 9.07496484404221e-06, "epoch": 0.277254777812953, "percentage": 27.73, "elapsed_time": "5:31:12", "remaining_time": "14:23:23"}
125
+ {"current_steps": 1050, "total_steps": 3751, "loss": 0.2775, "lr": 9.04781889579873e-06, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:33:30", "remaining_time": "14:17:55"}
126
+ {"current_steps": 1050, "total_steps": 3751, "eval_loss": 0.27627232670783997, "epoch": 0.27992068913807755, "percentage": 27.99, "elapsed_time": "5:37:53", "remaining_time": "14:29:11"}
127
+ {"current_steps": 1060, "total_steps": 3751, "loss": 0.2803, "lr": 9.020322219805674e-06, "epoch": 0.2825866004632021, "percentage": 28.26, "elapsed_time": "5:40:16", "remaining_time": "14:23:50"}
128
+ {"current_steps": 1070, "total_steps": 3751, "loss": 0.2831, "lr": 8.99247719854297e-06, "epoch": 0.28525251178832667, "percentage": 28.53, "elapsed_time": "5:42:37", "remaining_time": "14:18:29"}
129
+ {"current_steps": 1080, "total_steps": 3751, "loss": 0.2794, "lr": 8.964286244673315e-06, "epoch": 0.28791842311345117, "percentage": 28.79, "elapsed_time": "5:44:54", "remaining_time": "14:13:00"}
130
+ {"current_steps": 1090, "total_steps": 3751, "loss": 0.2728, "lr": 8.935751800833117e-06, "epoch": 0.29058433443857573, "percentage": 29.06, "elapsed_time": "5:47:12", "remaining_time": "14:07:38"}
131
+ {"current_steps": 1100, "total_steps": 3751, "loss": 0.2652, "lr": 8.906876339420851e-06, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:49:33", "remaining_time": "14:02:25"}
132
+ {"current_steps": 1100, "total_steps": 3751, "eval_loss": 0.27173659205436707, "epoch": 0.2932502457637003, "percentage": 29.33, "elapsed_time": "5:53:56", "remaining_time": "14:12:59"}
133
+ {"current_steps": 1110, "total_steps": 3751, "loss": 0.2731, "lr": 8.877662362382844e-06, "epoch": 0.29591615708882485, "percentage": 29.59, "elapsed_time": "5:56:14", "remaining_time": "14:07:36"}
134
+ {"current_steps": 1120, "total_steps": 3751, "loss": 0.2669, "lr": 8.848112400996473e-06, "epoch": 0.2985820684139494, "percentage": 29.86, "elapsed_time": "5:58:38", "remaining_time": "14:02:29"}
135
+ {"current_steps": 1130, "total_steps": 3751, "loss": 0.274, "lr": 8.818229015650862e-06, "epoch": 0.3012479797390739, "percentage": 30.13, "elapsed_time": "6:00:59", "remaining_time": "13:57:18"}
136
+ {"current_steps": 1140, "total_steps": 3751, "loss": 0.275, "lr": 8.788014795625018e-06, "epoch": 0.30391389106419847, "percentage": 30.39, "elapsed_time": "6:03:19", "remaining_time": "13:52:07"}
137
+ {"current_steps": 1150, "total_steps": 3751, "loss": 0.27, "lr": 8.757472358863481e-06, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:05:37", "remaining_time": "13:46:57"}
138
+ {"current_steps": 1150, "total_steps": 3751, "eval_loss": 0.2712327241897583, "epoch": 0.30657980238932303, "percentage": 30.66, "elapsed_time": "6:10:00", "remaining_time": "13:56:52"}
139
+ {"current_steps": 1160, "total_steps": 3751, "loss": 0.2676, "lr": 8.726604351749503e-06, "epoch": 0.3092457137144476, "percentage": 30.93, "elapsed_time": "6:12:19", "remaining_time": "13:51:37"}
140
+ {"current_steps": 1170, "total_steps": 3751, "loss": 0.2779, "lr": 8.69541344887573e-06, "epoch": 0.3119116250395721, "percentage": 31.19, "elapsed_time": "6:14:38", "remaining_time": "13:46:26"}
141
+ {"current_steps": 1180, "total_steps": 3751, "loss": 0.2693, "lr": 8.66390235281248e-06, "epoch": 0.31457753636469665, "percentage": 31.46, "elapsed_time": "6:16:55", "remaining_time": "13:41:13"}
142
+ {"current_steps": 1190, "total_steps": 3751, "loss": 0.2709, "lr": 8.632073793873548e-06, "epoch": 0.3172434476898212, "percentage": 31.72, "elapsed_time": "6:19:18", "remaining_time": "13:36:18"}
143
+ {"current_steps": 1200, "total_steps": 3751, "loss": 0.277, "lr": 8.599930529879669e-06, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:21:43", "remaining_time": "13:31:27"}
144
+ {"current_steps": 1200, "total_steps": 3751, "eval_loss": 0.2749159336090088, "epoch": 0.31990935901494577, "percentage": 31.99, "elapsed_time": "6:26:05", "remaining_time": "13:40:46"}
145
+ {"current_steps": 1210, "total_steps": 3751, "loss": 0.2629, "lr": 8.567475345919532e-06, "epoch": 0.32257527034007033, "percentage": 32.26, "elapsed_time": "6:28:27", "remaining_time": "13:35:46"}
146
+ {"current_steps": 1220, "total_steps": 3751, "loss": 0.2747, "lr": 8.534711054108487e-06, "epoch": 0.32524118166519483, "percentage": 32.52, "elapsed_time": "6:30:46", "remaining_time": "13:30:41"}
147
+ {"current_steps": 1230, "total_steps": 3751, "loss": 0.2644, "lr": 8.501640493344866e-06, "epoch": 0.3279070929903194, "percentage": 32.79, "elapsed_time": "6:33:02", "remaining_time": "13:25:34"}
148
+ {"current_steps": 1240, "total_steps": 3751, "loss": 0.263, "lr": 8.468266529064025e-06, "epoch": 0.33057300431544395, "percentage": 33.06, "elapsed_time": "6:35:17", "remaining_time": "13:20:28"}
149
+ {"current_steps": 1250, "total_steps": 3751, "loss": 0.2681, "lr": 8.434592052990044e-06, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:37:34", "remaining_time": "13:15:27"}
150
+ {"current_steps": 1250, "total_steps": 3751, "eval_loss": 0.2708764672279358, "epoch": 0.3332389156405685, "percentage": 33.32, "elapsed_time": "6:41:57", "remaining_time": "13:24:13"}
151
+ {"current_steps": 1260, "total_steps": 3751, "loss": 0.272, "lr": 8.400619982885183e-06, "epoch": 0.33590482696569307, "percentage": 33.59, "elapsed_time": "6:44:14", "remaining_time": "13:19:11"}
152
+ {"current_steps": 1270, "total_steps": 3751, "loss": 0.2713, "lr": 8.366353262297069e-06, "epoch": 0.3385707382908176, "percentage": 33.86, "elapsed_time": "6:46:34", "remaining_time": "13:14:16"}
153
+ {"current_steps": 1280, "total_steps": 3751, "loss": 0.2792, "lr": 8.331794860303644e-06, "epoch": 0.34123664961594213, "percentage": 34.12, "elapsed_time": "6:48:52", "remaining_time": "13:09:19"}
154
+ {"current_steps": 1290, "total_steps": 3751, "loss": 0.2628, "lr": 8.296947771255905e-06, "epoch": 0.3439025609410667, "percentage": 34.39, "elapsed_time": "6:51:10", "remaining_time": "13:04:24"}
155
+ {"current_steps": 1300, "total_steps": 3751, "loss": 0.2699, "lr": 8.261815014518465e-06, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:53:28", "remaining_time": "12:59:32"}
156
+ {"current_steps": 1300, "total_steps": 3751, "eval_loss": 0.27181142568588257, "epoch": 0.34656847226619125, "percentage": 34.66, "elapsed_time": "6:57:50", "remaining_time": "13:07:48"}
157
+ {"current_steps": 1310, "total_steps": 3751, "loss": 0.2734, "lr": 8.226399634207929e-06, "epoch": 0.3492343835913158, "percentage": 34.92, "elapsed_time": "7:00:10", "remaining_time": "13:02:56"}
158
+ {"current_steps": 1320, "total_steps": 3751, "loss": 0.2638, "lr": 8.190704698929128e-06, "epoch": 0.3519002949164403, "percentage": 35.19, "elapsed_time": "7:02:29", "remaining_time": "12:58:05"}
159
+ {"current_steps": 1330, "total_steps": 3751, "loss": 0.2595, "lr": 8.154733301509249e-06, "epoch": 0.3545662062415649, "percentage": 35.46, "elapsed_time": "7:04:48", "remaining_time": "12:53:15"}
160
+ {"current_steps": 1340, "total_steps": 3751, "loss": 0.2663, "lr": 8.118488558729846e-06, "epoch": 0.35723211756668943, "percentage": 35.72, "elapsed_time": "7:07:10", "remaining_time": "12:48:36"}
161
+ {"current_steps": 1350, "total_steps": 3751, "loss": 0.2682, "lr": 8.081973611056784e-06, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:09:31", "remaining_time": "12:43:54"}
162
+ {"current_steps": 1350, "total_steps": 3751, "eval_loss": 0.267572283744812, "epoch": 0.359898028891814, "percentage": 35.99, "elapsed_time": "7:13:54", "remaining_time": "12:51:42"}
163
+ {"current_steps": 1360, "total_steps": 3751, "loss": 0.2593, "lr": 8.045191622368128e-06, "epoch": 0.36256394021693855, "percentage": 36.26, "elapsed_time": "7:16:12", "remaining_time": "12:46:53"}
164
+ {"current_steps": 1370, "total_steps": 3751, "loss": 0.2639, "lr": 8.008145779680011e-06, "epoch": 0.36522985154206306, "percentage": 36.52, "elapsed_time": "7:18:32", "remaining_time": "12:42:09"}
165
+ {"current_steps": 1380, "total_steps": 3751, "loss": 0.2669, "lr": 7.970839292870488e-06, "epoch": 0.3678957628671876, "percentage": 36.79, "elapsed_time": "7:20:47", "remaining_time": "12:37:20"}
166
+ {"current_steps": 1390, "total_steps": 3751, "loss": 0.2558, "lr": 7.933275394401407e-06, "epoch": 0.3705616741923122, "percentage": 37.06, "elapsed_time": "7:23:06", "remaining_time": "12:32:38"}
167
+ {"current_steps": 1400, "total_steps": 3751, "loss": 0.2668, "lr": 7.89545733903834e-06, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:25:22", "remaining_time": "12:27:54"}
168
+ {"current_steps": 1400, "total_steps": 3751, "eval_loss": 0.26621583104133606, "epoch": 0.37322758551743673, "percentage": 37.32, "elapsed_time": "7:29:45", "remaining_time": "12:35:16"}
169
+ {"current_steps": 1410, "total_steps": 3751, "loss": 0.2562, "lr": 7.857388403568564e-06, "epoch": 0.3758934968425613, "percentage": 37.59, "elapsed_time": "7:32:05", "remaining_time": "12:30:35"}
170
+ {"current_steps": 1420, "total_steps": 3751, "loss": 0.2641, "lr": 7.819071886517134e-06, "epoch": 0.3785594081676858, "percentage": 37.86, "elapsed_time": "7:34:23", "remaining_time": "12:25:55"}
171
+ {"current_steps": 1430, "total_steps": 3751, "loss": 0.2574, "lr": 7.780511107861095e-06, "epoch": 0.38122531949281036, "percentage": 38.12, "elapsed_time": "7:36:40", "remaining_time": "12:21:12"}
172
+ {"current_steps": 1440, "total_steps": 3751, "loss": 0.2523, "lr": 7.741709408741804e-06, "epoch": 0.3838912308179349, "percentage": 38.39, "elapsed_time": "7:38:57", "remaining_time": "12:16:34"}
173
+ {"current_steps": 1450, "total_steps": 3751, "loss": 0.2615, "lr": 7.702670151175435e-06, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:41:17", "remaining_time": "12:12:00"}
174
+ {"current_steps": 1450, "total_steps": 3751, "eval_loss": 0.26893940567970276, "epoch": 0.3865571421430595, "percentage": 38.66, "elapsed_time": "7:45:39", "remaining_time": "12:18:57"}
175
+ {"current_steps": 1460, "total_steps": 3751, "loss": 0.2598, "lr": 7.663396717761687e-06, "epoch": 0.38922305346818403, "percentage": 38.92, "elapsed_time": "7:48:00", "remaining_time": "12:14:23"}
176
+ {"current_steps": 1470, "total_steps": 3751, "loss": 0.2595, "lr": 7.6238925113906715e-06, "epoch": 0.39188896479330854, "percentage": 39.19, "elapsed_time": "7:50:15", "remaining_time": "12:09:42"}
177
+ {"current_steps": 1480, "total_steps": 3751, "loss": 0.2706, "lr": 7.5841609549480854e-06, "epoch": 0.3945548761184331, "percentage": 39.46, "elapsed_time": "7:52:32", "remaining_time": "12:05:06"}
178
+ {"current_steps": 1490, "total_steps": 3751, "loss": 0.252, "lr": 7.544205491018626e-06, "epoch": 0.39722078744355765, "percentage": 39.72, "elapsed_time": "7:54:54", "remaining_time": "12:00:38"}
179
+ {"current_steps": 1500, "total_steps": 3751, "loss": 0.2501, "lr": 7.5040295815877e-06, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "7:57:13", "remaining_time": "11:56:09"}
180
+ {"current_steps": 1500, "total_steps": 3751, "eval_loss": 0.2582685649394989, "epoch": 0.3998866987686822, "percentage": 39.99, "elapsed_time": "8:01:36", "remaining_time": "12:02:44"}
181
+ {"current_steps": 1510, "total_steps": 3751, "loss": 0.2604, "lr": 7.463636707741458e-06, "epoch": 0.4025526100938068, "percentage": 40.26, "elapsed_time": "8:04:56", "remaining_time": "11:59:42"}
182
+ {"current_steps": 1520, "total_steps": 3751, "loss": 0.264, "lr": 7.423030369365175e-06, "epoch": 0.4052185214189313, "percentage": 40.52, "elapsed_time": "8:07:14", "remaining_time": "11:55:09"}