File size: 7,726 Bytes
2dba4f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9999921630708223,
  "eval_steps": 700,
  "global_step": 7975,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.08777360678991544,
      "grad_norm": 15.821257591247559,
      "learning_rate": 9.179810725552051e-05,
      "loss": 4.3692,
      "step": 700
    },
    {
      "epoch": 0.08777360678991544,
      "eval_loss": 0.2736443877220154,
      "eval_mae": 0.39691290259361267,
      "eval_mse": 0.2736443877220154,
      "eval_pearson_r": 0.5351819396018982,
      "eval_r2": 0.2831069827079773,
      "eval_rmse": 0.5231103301048279,
      "eval_runtime": 633.0966,
      "eval_samples_per_second": 19.348,
      "eval_spearman_r": 0.5498853126880646,
      "eval_steps_per_second": 2.42,
      "step": 700
    },
    {
      "epoch": 0.17554721357983089,
      "grad_norm": 12.040508270263672,
      "learning_rate": 8.296529968454258e-05,
      "loss": 3.658,
      "step": 1400
    },
    {
      "epoch": 0.17554721357983089,
      "eval_loss": 0.25718629360198975,
      "eval_mae": 0.3839031159877777,
      "eval_mse": 0.25718629360198975,
      "eval_pearson_r": 0.5725416541099548,
      "eval_r2": 0.32622385025024414,
      "eval_rmse": 0.5071353912353516,
      "eval_runtime": 627.8848,
      "eval_samples_per_second": 19.508,
      "eval_spearman_r": 0.5926255817362731,
      "eval_steps_per_second": 2.44,
      "step": 1400
    },
    {
      "epoch": 0.2633208203697463,
      "grad_norm": 30.66448974609375,
      "learning_rate": 7.413249211356468e-05,
      "loss": 3.4843,
      "step": 2100
    },
    {
      "epoch": 0.2633208203697463,
      "eval_loss": 0.26160550117492676,
      "eval_mae": 0.3920997679233551,
      "eval_mse": 0.26160547137260437,
      "eval_pearson_r": 0.5790643692016602,
      "eval_r2": 0.31464648246765137,
      "eval_rmse": 0.5114738345146179,
      "eval_runtime": 628.627,
      "eval_samples_per_second": 19.485,
      "eval_spearman_r": 0.5891729768312147,
      "eval_steps_per_second": 2.437,
      "step": 2100
    },
    {
      "epoch": 0.35109442715966177,
      "grad_norm": 18.40130043029785,
      "learning_rate": 6.529968454258676e-05,
      "loss": 3.4036,
      "step": 2800
    },
    {
      "epoch": 0.35109442715966177,
      "eval_loss": 0.2504981458187103,
      "eval_mae": 0.378262460231781,
      "eval_mse": 0.2504981458187103,
      "eval_pearson_r": 0.5949272513389587,
      "eval_r2": 0.34374547004699707,
      "eval_rmse": 0.5004978775978088,
      "eval_runtime": 628.0494,
      "eval_samples_per_second": 19.503,
      "eval_spearman_r": 0.6035960603894734,
      "eval_steps_per_second": 2.439,
      "step": 2800
    },
    {
      "epoch": 0.4388680339495772,
      "grad_norm": 11.223097801208496,
      "learning_rate": 5.646687697160884e-05,
      "loss": 3.2658,
      "step": 3500
    },
    {
      "epoch": 0.4388680339495772,
      "eval_loss": 0.2516283392906189,
      "eval_mae": 0.3826421797275543,
      "eval_mse": 0.2516283392906189,
      "eval_pearson_r": 0.6112059354782104,
      "eval_r2": 0.34078454971313477,
      "eval_rmse": 0.5016257166862488,
      "eval_runtime": 628.4233,
      "eval_samples_per_second": 19.492,
      "eval_spearman_r": 0.6174392220462328,
      "eval_steps_per_second": 2.438,
      "step": 3500
    },
    {
      "epoch": 0.5266416407394926,
      "grad_norm": 34.326786041259766,
      "learning_rate": 4.763406940063092e-05,
      "loss": 3.1724,
      "step": 4200
    },
    {
      "epoch": 0.5266416407394926,
      "eval_loss": 0.24652785062789917,
      "eval_mae": 0.37860819697380066,
      "eval_mse": 0.24652785062789917,
      "eval_pearson_r": 0.6118167638778687,
      "eval_r2": 0.3541467785835266,
      "eval_rmse": 0.4965157210826874,
      "eval_runtime": 628.1591,
      "eval_samples_per_second": 19.5,
      "eval_spearman_r": 0.6173150995319716,
      "eval_steps_per_second": 2.439,
      "step": 4200
    },
    {
      "epoch": 0.6144152475294081,
      "grad_norm": 7.161226272583008,
      "learning_rate": 3.8801261829652994e-05,
      "loss": 3.1014,
      "step": 4900
    },
    {
      "epoch": 0.6144152475294081,
      "eval_loss": 0.2447061538696289,
      "eval_mae": 0.3736642599105835,
      "eval_mse": 0.2447061538696289,
      "eval_pearson_r": 0.6133831739425659,
      "eval_r2": 0.35891926288604736,
      "eval_rmse": 0.4946778416633606,
      "eval_runtime": 630.4288,
      "eval_samples_per_second": 19.43,
      "eval_spearman_r": 0.6261022939088873,
      "eval_steps_per_second": 2.43,
      "step": 4900
    },
    {
      "epoch": 0.7021888543193235,
      "grad_norm": 36.81498718261719,
      "learning_rate": 2.9968454258675084e-05,
      "loss": 3.0568,
      "step": 5600
    },
    {
      "epoch": 0.7021888543193235,
      "eval_loss": 0.23911960422992706,
      "eval_mae": 0.37206539511680603,
      "eval_mse": 0.23911964893341064,
      "eval_pearson_r": 0.6180469989776611,
      "eval_r2": 0.3735548257827759,
      "eval_rmse": 0.4889986217021942,
      "eval_runtime": 628.247,
      "eval_samples_per_second": 19.497,
      "eval_spearman_r": 0.620481976822377,
      "eval_steps_per_second": 2.439,
      "step": 5600
    },
    {
      "epoch": 0.789962461109239,
      "grad_norm": 8.716201782226562,
      "learning_rate": 2.113564668769716e-05,
      "loss": 3.0154,
      "step": 6300
    },
    {
      "epoch": 0.789962461109239,
      "eval_loss": 0.24396386742591858,
      "eval_mae": 0.37591251730918884,
      "eval_mse": 0.24396386742591858,
      "eval_pearson_r": 0.6165634989738464,
      "eval_r2": 0.36086392402648926,
      "eval_rmse": 0.4939269721508026,
      "eval_runtime": 628.8587,
      "eval_samples_per_second": 19.478,
      "eval_spearman_r": 0.6244525948547685,
      "eval_steps_per_second": 2.436,
      "step": 6300
    },
    {
      "epoch": 0.8777360678991544,
      "grad_norm": 10.887434005737305,
      "learning_rate": 1.2302839116719243e-05,
      "loss": 2.939,
      "step": 7000
    },
    {
      "epoch": 0.8777360678991544,
      "eval_loss": 0.24175813794136047,
      "eval_mae": 0.3743629455566406,
      "eval_mse": 0.24175813794136047,
      "eval_pearson_r": 0.6214854717254639,
      "eval_r2": 0.3666425347328186,
      "eval_rmse": 0.4916890561580658,
      "eval_runtime": 628.0862,
      "eval_samples_per_second": 19.502,
      "eval_spearman_r": 0.6260799567516182,
      "eval_steps_per_second": 2.439,
      "step": 7000
    },
    {
      "epoch": 0.9655096746890698,
      "grad_norm": 11.310564994812012,
      "learning_rate": 3.470031545741325e-06,
      "loss": 2.9347,
      "step": 7700
    },
    {
      "epoch": 0.9655096746890698,
      "eval_loss": 0.24253901839256287,
      "eval_mae": 0.3738330006599426,
      "eval_mse": 0.24253901839256287,
      "eval_pearson_r": 0.622931182384491,
      "eval_r2": 0.36459678411483765,
      "eval_rmse": 0.4924825131893158,
      "eval_runtime": 629.0849,
      "eval_samples_per_second": 19.471,
      "eval_spearman_r": 0.6275805173207804,
      "eval_steps_per_second": 2.435,
      "step": 7700
    }
  ],
  "logging_steps": 700,
  "max_steps": 7975,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 700,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.751589692941271e+19,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": null
}