aleegis commited on
Commit
af57644
·
verified ·
1 Parent(s): 1b550c6

Training in progress, step 1800, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:38ca30b0a85dcd68fc0dbccc62f56ab8c9f5433e437fc9a0602109942006fbe4
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90e27c540ff144b90538ab4cacac709849d64f98215dc906cc8c8baa494d1e74
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9eb0b3fdfdb8d78958652fd6819c6675c3094960a2dc507442cf2e610af75681
3
  size 203719079
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:593a6dd089fbf47d216b98bdf9aaa1ff9d60e7d4765ca76f9bdee22e5bd99278
3
  size 203719079
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6692322f7c6e3bc68ee2a89437733e388ea9cbf749c02b54be14347a06832463
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32012078917f404c8f6386ecc9d3ec9e72ffb254583698b16b344fb93bcee60
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60267096a85446a4c770b4624a4ae7a18d27516875db8612976011f09d3850bc
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b2c2a41069cae9d886e84130c34f3e080d925a2566cc57a8f9380b6563f44c6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.2376049421827974,
6
  "eval_steps": 500,
7
- "global_step": 1500,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1058,6 +1058,216 @@
1058
  "learning_rate": 5.873969678568784e-05,
1059
  "loss": 1.3405,
1060
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1061
  }
1062
  ],
1063
  "logging_steps": 10,
@@ -1077,7 +1287,7 @@
1077
  "attributes": {}
1078
  }
1079
  },
1080
- "total_flos": 4.9390380122112e+17,
1081
  "train_batch_size": 16,
1082
  "trial_name": null,
1083
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.28512593061935687,
6
  "eval_steps": 500,
7
+ "global_step": 1800,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1058
  "learning_rate": 5.873969678568784e-05,
1059
  "loss": 1.3405,
1060
  "step": 1500
1061
+ },
1062
+ {
1063
+ "epoch": 0.2391889751306827,
1064
+ "grad_norm": 0.8088416457176208,
1065
+ "learning_rate": 5.816629811208112e-05,
1066
+ "loss": 1.3383,
1067
+ "step": 1510
1068
+ },
1069
+ {
1070
+ "epoch": 0.24077300807856802,
1071
+ "grad_norm": 0.7820030450820923,
1072
+ "learning_rate": 5.759179385243224e-05,
1073
+ "loss": 1.4342,
1074
+ "step": 1520
1075
+ },
1076
+ {
1077
+ "epoch": 0.24235704102645336,
1078
+ "grad_norm": 0.7880681157112122,
1079
+ "learning_rate": 5.701626178542158e-05,
1080
+ "loss": 1.2435,
1081
+ "step": 1530
1082
+ },
1083
+ {
1084
+ "epoch": 0.24394107397433867,
1085
+ "grad_norm": 0.7700749635696411,
1086
+ "learning_rate": 5.643977982887815e-05,
1087
+ "loss": 1.3295,
1088
+ "step": 1540
1089
+ },
1090
+ {
1091
+ "epoch": 0.24552510692222398,
1092
+ "grad_norm": 0.8325817584991455,
1093
+ "learning_rate": 5.586242602923081e-05,
1094
+ "loss": 1.3995,
1095
+ "step": 1550
1096
+ },
1097
+ {
1098
+ "epoch": 0.2471091398701093,
1099
+ "grad_norm": 0.9384058117866516,
1100
+ "learning_rate": 5.528427855094206e-05,
1101
+ "loss": 1.4681,
1102
+ "step": 1560
1103
+ },
1104
+ {
1105
+ "epoch": 0.2486931728179946,
1106
+ "grad_norm": 0.8473599553108215,
1107
+ "learning_rate": 5.470541566592573e-05,
1108
+ "loss": 1.4149,
1109
+ "step": 1570
1110
+ },
1111
+ {
1112
+ "epoch": 0.2502772057658799,
1113
+ "grad_norm": 0.8693270683288574,
1114
+ "learning_rate": 5.4125915742950275e-05,
1115
+ "loss": 1.3144,
1116
+ "step": 1580
1117
+ },
1118
+ {
1119
+ "epoch": 0.25186123871376526,
1120
+ "grad_norm": 0.9569868445396423,
1121
+ "learning_rate": 5.354585723702893e-05,
1122
+ "loss": 1.4269,
1123
+ "step": 1590
1124
+ },
1125
+ {
1126
+ "epoch": 0.25344527166165054,
1127
+ "grad_norm": 1.0059610605239868,
1128
+ "learning_rate": 5.296531867879809e-05,
1129
+ "loss": 1.404,
1130
+ "step": 1600
1131
+ },
1132
+ {
1133
+ "epoch": 0.2550293046095359,
1134
+ "grad_norm": 0.7387624979019165,
1135
+ "learning_rate": 5.2384378663885545e-05,
1136
+ "loss": 1.3275,
1137
+ "step": 1610
1138
+ },
1139
+ {
1140
+ "epoch": 0.2566133375574212,
1141
+ "grad_norm": 0.7581918239593506,
1142
+ "learning_rate": 5.180311584226991e-05,
1143
+ "loss": 1.3166,
1144
+ "step": 1620
1145
+ },
1146
+ {
1147
+ "epoch": 0.2581973705053065,
1148
+ "grad_norm": 0.9148341417312622,
1149
+ "learning_rate": 5.1221608907632665e-05,
1150
+ "loss": 1.3702,
1151
+ "step": 1630
1152
+ },
1153
+ {
1154
+ "epoch": 0.25978140345319184,
1155
+ "grad_norm": 0.7988713383674622,
1156
+ "learning_rate": 5.063993658670425e-05,
1157
+ "loss": 1.2571,
1158
+ "step": 1640
1159
+ },
1160
+ {
1161
+ "epoch": 0.2613654364010771,
1162
+ "grad_norm": 0.6659321188926697,
1163
+ "learning_rate": 5.0058177628605795e-05,
1164
+ "loss": 1.345,
1165
+ "step": 1650
1166
+ },
1167
+ {
1168
+ "epoch": 0.26294946934896246,
1169
+ "grad_norm": 0.9008516073226929,
1170
+ "learning_rate": 4.947641079418773e-05,
1171
+ "loss": 1.3583,
1172
+ "step": 1660
1173
+ },
1174
+ {
1175
+ "epoch": 0.2645335022968478,
1176
+ "grad_norm": 0.8323536515235901,
1177
+ "learning_rate": 4.889471484536672e-05,
1178
+ "loss": 1.3591,
1179
+ "step": 1670
1180
+ },
1181
+ {
1182
+ "epoch": 0.2661175352447331,
1183
+ "grad_norm": 0.7164818644523621,
1184
+ "learning_rate": 4.83131685344628e-05,
1185
+ "loss": 1.3439,
1186
+ "step": 1680
1187
+ },
1188
+ {
1189
+ "epoch": 0.2677015681926184,
1190
+ "grad_norm": 0.7881910800933838,
1191
+ "learning_rate": 4.773185059353732e-05,
1192
+ "loss": 1.3651,
1193
+ "step": 1690
1194
+ },
1195
+ {
1196
+ "epoch": 0.2692856011405037,
1197
+ "grad_norm": 0.9037622213363647,
1198
+ "learning_rate": 4.715083972373401e-05,
1199
+ "loss": 1.3871,
1200
+ "step": 1700
1201
+ },
1202
+ {
1203
+ "epoch": 0.27086963408838904,
1204
+ "grad_norm": 0.8563185930252075,
1205
+ "learning_rate": 4.657021458462409e-05,
1206
+ "loss": 1.4886,
1207
+ "step": 1710
1208
+ },
1209
+ {
1210
+ "epoch": 0.2724536670362743,
1211
+ "grad_norm": 0.8638001084327698,
1212
+ "learning_rate": 4.599005378355706e-05,
1213
+ "loss": 1.4762,
1214
+ "step": 1720
1215
+ },
1216
+ {
1217
+ "epoch": 0.27403769998415967,
1218
+ "grad_norm": 0.7504866123199463,
1219
+ "learning_rate": 4.541043586501842e-05,
1220
+ "loss": 1.2971,
1221
+ "step": 1730
1222
+ },
1223
+ {
1224
+ "epoch": 0.275621732932045,
1225
+ "grad_norm": 0.786354660987854,
1226
+ "learning_rate": 4.4831439299996084e-05,
1227
+ "loss": 1.3203,
1228
+ "step": 1740
1229
+ },
1230
+ {
1231
+ "epoch": 0.2772057658799303,
1232
+ "grad_norm": 0.7911379337310791,
1233
+ "learning_rate": 4.425314247535668e-05,
1234
+ "loss": 1.3526,
1235
+ "step": 1750
1236
+ },
1237
+ {
1238
+ "epoch": 0.2787897988278156,
1239
+ "grad_norm": 0.9542713165283203,
1240
+ "learning_rate": 4.3675623683233135e-05,
1241
+ "loss": 1.2415,
1242
+ "step": 1760
1243
+ },
1244
+ {
1245
+ "epoch": 0.2803738317757009,
1246
+ "grad_norm": 0.7208961844444275,
1247
+ "learning_rate": 4.309896111042529e-05,
1248
+ "loss": 1.3313,
1249
+ "step": 1770
1250
+ },
1251
+ {
1252
+ "epoch": 0.28195786472358625,
1253
+ "grad_norm": 0.8950613141059875,
1254
+ "learning_rate": 4.252323282781453e-05,
1255
+ "loss": 1.3802,
1256
+ "step": 1780
1257
+ },
1258
+ {
1259
+ "epoch": 0.2835418976714716,
1260
+ "grad_norm": 0.7467291951179504,
1261
+ "learning_rate": 4.1948516779794364e-05,
1262
+ "loss": 1.4616,
1263
+ "step": 1790
1264
+ },
1265
+ {
1266
+ "epoch": 0.28512593061935687,
1267
+ "grad_norm": 0.9284554719924927,
1268
+ "learning_rate": 4.137489077371787e-05,
1269
+ "loss": 1.3483,
1270
+ "step": 1800
1271
  }
1272
  ],
1273
  "logging_steps": 10,
 
1287
  "attributes": {}
1288
  }
1289
  },
1290
+ "total_flos": 5.92684561465344e+17,
1291
  "train_batch_size": 16,
1292
  "trial_name": null,
1293
  "trial_params": null