bombshelll commited on
Commit
18438fb
·
verified ·
1 Parent(s): 349b6ff

End of training

Browse files
README.md CHANGED
@@ -18,8 +18,8 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.0620
22
- - Accuracy: 0.9778
23
 
24
  ## Model description
25
 
 
18
 
19
  This model is a fine-tuned version of [microsoft/swin-tiny-patch4-window7-224](https://huggingface.co/microsoft/swin-tiny-patch4-window7-224) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 0.0472
22
+ - Accuracy: 0.9852
23
 
24
  ## Model description
25
 
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 3.121999450369229e+16,
4
- "train_loss": 0.3916594386100769,
5
- "train_runtime": 22.6465,
6
- "train_samples_per_second": 69.326,
7
- "train_steps_per_second": 0.442
 
 
 
 
 
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9851851851851852,
4
+ "eval_loss": 0.04721328616142273,
5
+ "eval_runtime": 0.7379,
6
+ "eval_samples_per_second": 182.957,
7
+ "eval_steps_per_second": 6.776,
8
+ "total_flos": 1.565971061889024e+17,
9
+ "train_loss": 0.394845541715622,
10
+ "train_runtime": 86.4231,
11
+ "train_samples_per_second": 72.897,
12
+ "train_steps_per_second": 0.579
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.9090909090909091,
4
- "eval_loss": 0.5003632307052612,
5
- "eval_runtime": 0.1947,
6
- "eval_samples_per_second": 169.491,
7
- "eval_steps_per_second": 10.272
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9851851851851852,
4
+ "eval_loss": 0.05875137820839882,
5
+ "eval_runtime": 0.7872,
6
+ "eval_samples_per_second": 171.496,
7
+ "eval_steps_per_second": 6.352
8
  }
test_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.8484848484848485,
4
- "eval_loss": 0.5320981740951538,
5
- "eval_runtime": 0.1987,
6
- "eval_samples_per_second": 166.048,
7
- "eval_steps_per_second": 10.064
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9851851851851852,
4
+ "eval_loss": 0.04721328616142273,
5
+ "eval_runtime": 0.7379,
6
+ "eval_samples_per_second": 182.957,
7
+ "eval_steps_per_second": 6.776
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 3.121999450369229e+16,
4
- "train_loss": 0.3916594386100769,
5
- "train_runtime": 22.6465,
6
- "train_samples_per_second": 69.326,
7
- "train_steps_per_second": 0.442
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "total_flos": 1.565971061889024e+17,
4
+ "train_loss": 0.394845541715622,
5
+ "train_runtime": 86.4231,
6
+ "train_samples_per_second": 72.897,
7
+ "train_steps_per_second": 0.579
8
  }
trainer_state.json CHANGED
@@ -1,153 +1,185 @@
1
  {
2
- "best_metric": 0.9696969696969697,
3
- "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-6",
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.8,
13
- "grad_norm": 4.3462605476379395,
14
  "learning_rate": 5e-05,
15
- "loss": 0.7819,
16
- "step": 1
17
  },
18
  {
19
- "epoch": 0.8,
20
- "eval_accuracy": 0.8484848484848485,
21
- "eval_loss": 0.5965157151222229,
22
- "eval_runtime": 0.2012,
23
- "eval_samples_per_second": 164.047,
24
- "eval_steps_per_second": 9.942,
25
- "step": 1
26
  },
27
  {
28
- "epoch": 1.6,
29
- "grad_norm": 4.70327091217041,
30
  "learning_rate": 4.4444444444444447e-05,
31
- "loss": 0.689,
32
- "step": 2
33
  },
34
  {
35
- "epoch": 1.6,
36
- "eval_accuracy": 0.8484848484848485,
37
- "eval_loss": 0.5047040581703186,
38
- "eval_runtime": 0.1988,
39
- "eval_samples_per_second": 165.985,
40
- "eval_steps_per_second": 10.06,
41
- "step": 2
42
  },
43
  {
44
- "epoch": 2.4,
45
- "grad_norm": 5.389795780181885,
46
  "learning_rate": 3.888888888888889e-05,
47
- "loss": 0.58,
48
- "step": 3
49
  },
50
  {
51
- "epoch": 2.4,
52
- "eval_accuracy": 0.9090909090909091,
53
- "eval_loss": 0.3954509198665619,
54
- "eval_runtime": 0.196,
55
- "eval_samples_per_second": 168.338,
56
- "eval_steps_per_second": 10.202,
57
- "step": 3
58
  },
59
  {
60
  "epoch": 4.0,
61
- "grad_norm": 4.907708644866943,
62
- "learning_rate": 2.777777777777778e-05,
63
- "loss": 0.2252,
64
- "step": 5
65
  },
66
  {
67
  "epoch": 4.0,
68
- "eval_accuracy": 0.9393939393939394,
69
- "eval_loss": 0.2680495083332062,
70
- "eval_runtime": 0.2061,
71
- "eval_samples_per_second": 160.109,
72
- "eval_steps_per_second": 9.704,
73
- "step": 5
74
  },
75
  {
76
- "epoch": 4.8,
77
- "grad_norm": 6.3704328536987305,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  "learning_rate": 2.2222222222222223e-05,
79
- "loss": 0.3826,
80
- "step": 6
81
  },
82
  {
83
- "epoch": 4.8,
84
- "eval_accuracy": 0.9696969696969697,
85
- "eval_loss": 0.2426709532737732,
86
- "eval_runtime": 0.2047,
87
- "eval_samples_per_second": 161.236,
88
- "eval_steps_per_second": 9.772,
89
- "step": 6
90
  },
91
  {
92
- "epoch": 5.6,
93
- "grad_norm": 5.068804740905762,
94
  "learning_rate": 1.6666666666666667e-05,
95
- "loss": 0.3551,
96
- "step": 7
97
  },
98
  {
99
- "epoch": 5.6,
100
- "eval_accuracy": 0.9090909090909091,
101
- "eval_loss": 0.22610554099082947,
102
- "eval_runtime": 0.2073,
103
- "eval_samples_per_second": 159.187,
104
- "eval_steps_per_second": 9.648,
105
- "step": 7
106
  },
107
  {
108
- "epoch": 6.4,
109
- "grad_norm": 3.142571449279785,
110
  "learning_rate": 1.1111111111111112e-05,
111
- "loss": 0.3171,
112
- "step": 8
113
  },
114
  {
115
- "epoch": 6.4,
116
- "eval_accuracy": 0.9393939393939394,
117
- "eval_loss": 0.21792414784431458,
118
- "eval_runtime": 0.1962,
119
- "eval_samples_per_second": 168.154,
120
- "eval_steps_per_second": 10.191,
121
- "step": 8
122
  },
123
  {
124
- "epoch": 8.0,
125
- "grad_norm": 4.538871765136719,
126
- "learning_rate": 0.0,
127
- "loss": 0.1802,
128
- "step": 10
129
  },
130
  {
131
- "epoch": 8.0,
132
- "eval_accuracy": 0.9393939393939394,
133
- "eval_loss": 0.21566466987133026,
134
- "eval_runtime": 0.219,
135
- "eval_samples_per_second": 150.655,
136
- "eval_steps_per_second": 9.131,
137
- "step": 10
138
  },
139
  {
140
- "epoch": 8.0,
141
- "step": 10,
142
- "total_flos": 3.121999450369229e+16,
143
- "train_loss": 0.3916594386100769,
144
- "train_runtime": 22.6465,
145
- "train_samples_per_second": 69.326,
146
- "train_steps_per_second": 0.442
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 1,
150
- "max_steps": 10,
151
  "num_input_tokens_seen": 0,
152
  "num_train_epochs": 10,
153
  "save_steps": 500,
@@ -163,7 +195,7 @@
163
  "attributes": {}
164
  }
165
  },
166
- "total_flos": 3.121999450369229e+16,
167
  "train_batch_size": 32,
168
  "trial_name": null,
169
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9851851851851852,
3
+ "best_model_checkpoint": "/kaggle/working/swin-brain-plane-classification/checkpoint-45",
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 1.0,
13
+ "grad_norm": 5.199264049530029,
14
  "learning_rate": 5e-05,
15
+ "loss": 1.1833,
16
+ "step": 5
17
  },
18
  {
19
+ "epoch": 1.0,
20
+ "eval_accuracy": 0.6074074074074074,
21
+ "eval_loss": 0.9675570130348206,
22
+ "eval_runtime": 0.7682,
23
+ "eval_samples_per_second": 175.727,
24
+ "eval_steps_per_second": 6.508,
25
+ "step": 5
26
  },
27
  {
28
+ "epoch": 2.0,
29
+ "grad_norm": 5.081261157989502,
30
  "learning_rate": 4.4444444444444447e-05,
31
+ "loss": 0.8399,
32
+ "step": 10
33
  },
34
  {
35
+ "epoch": 2.0,
36
+ "eval_accuracy": 0.8592592592592593,
37
+ "eval_loss": 0.5346508026123047,
38
+ "eval_runtime": 0.7504,
39
+ "eval_samples_per_second": 179.9,
40
+ "eval_steps_per_second": 6.663,
41
+ "step": 10
42
  },
43
  {
44
+ "epoch": 3.0,
45
+ "grad_norm": 7.020309925079346,
46
  "learning_rate": 3.888888888888889e-05,
47
+ "loss": 0.5341,
48
+ "step": 15
49
  },
50
  {
51
+ "epoch": 3.0,
52
+ "eval_accuracy": 0.9481481481481482,
53
+ "eval_loss": 0.2379625141620636,
54
+ "eval_runtime": 0.7591,
55
+ "eval_samples_per_second": 177.839,
56
+ "eval_steps_per_second": 6.587,
57
+ "step": 15
58
  },
59
  {
60
  "epoch": 4.0,
61
+ "grad_norm": 9.26108455657959,
62
+ "learning_rate": 3.3333333333333335e-05,
63
+ "loss": 0.3583,
64
+ "step": 20
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.9629629629629629,
69
+ "eval_loss": 0.13244035840034485,
70
+ "eval_runtime": 0.7541,
71
+ "eval_samples_per_second": 179.024,
72
+ "eval_steps_per_second": 6.631,
73
+ "step": 20
74
  },
75
  {
76
+ "epoch": 5.0,
77
+ "grad_norm": 5.511073112487793,
78
+ "learning_rate": 2.777777777777778e-05,
79
+ "loss": 0.2197,
80
+ "step": 25
81
+ },
82
+ {
83
+ "epoch": 5.0,
84
+ "eval_accuracy": 0.9555555555555556,
85
+ "eval_loss": 0.1311105489730835,
86
+ "eval_runtime": 0.7578,
87
+ "eval_samples_per_second": 178.154,
88
+ "eval_steps_per_second": 6.598,
89
+ "step": 25
90
+ },
91
+ {
92
+ "epoch": 6.0,
93
+ "grad_norm": 9.359658241271973,
94
  "learning_rate": 2.2222222222222223e-05,
95
+ "loss": 0.1995,
96
+ "step": 30
97
  },
98
  {
99
+ "epoch": 6.0,
100
+ "eval_accuracy": 0.9777777777777777,
101
+ "eval_loss": 0.08695662766695023,
102
+ "eval_runtime": 0.7501,
103
+ "eval_samples_per_second": 179.972,
104
+ "eval_steps_per_second": 6.666,
105
+ "step": 30
106
  },
107
  {
108
+ "epoch": 7.0,
109
+ "grad_norm": 5.365365028381348,
110
  "learning_rate": 1.6666666666666667e-05,
111
+ "loss": 0.1485,
112
+ "step": 35
113
  },
114
  {
115
+ "epoch": 7.0,
116
+ "eval_accuracy": 0.9777777777777777,
117
+ "eval_loss": 0.11073008179664612,
118
+ "eval_runtime": 0.7524,
119
+ "eval_samples_per_second": 179.431,
120
+ "eval_steps_per_second": 6.646,
121
+ "step": 35
122
  },
123
  {
124
+ "epoch": 8.0,
125
+ "grad_norm": 5.843384265899658,
126
  "learning_rate": 1.1111111111111112e-05,
127
+ "loss": 0.1689,
128
+ "step": 40
129
  },
130
  {
131
+ "epoch": 8.0,
132
+ "eval_accuracy": 0.9777777777777777,
133
+ "eval_loss": 0.07976409047842026,
134
+ "eval_runtime": 0.7575,
135
+ "eval_samples_per_second": 178.206,
136
+ "eval_steps_per_second": 6.6,
137
+ "step": 40
138
  },
139
  {
140
+ "epoch": 9.0,
141
+ "grad_norm": 4.395670413970947,
142
+ "learning_rate": 5.555555555555556e-06,
143
+ "loss": 0.1339,
144
+ "step": 45
145
  },
146
  {
147
+ "epoch": 9.0,
148
+ "eval_accuracy": 0.9851851851851852,
149
+ "eval_loss": 0.05875137820839882,
150
+ "eval_runtime": 0.7546,
151
+ "eval_samples_per_second": 178.893,
152
+ "eval_steps_per_second": 6.626,
153
+ "step": 45
154
  },
155
  {
156
+ "epoch": 10.0,
157
+ "grad_norm": 7.337897777557373,
158
+ "learning_rate": 0.0,
159
+ "loss": 0.1623,
160
+ "step": 50
161
+ },
162
+ {
163
+ "epoch": 10.0,
164
+ "eval_accuracy": 0.9777777777777777,
165
+ "eval_loss": 0.06197558343410492,
166
+ "eval_runtime": 0.7834,
167
+ "eval_samples_per_second": 172.335,
168
+ "eval_steps_per_second": 6.383,
169
+ "step": 50
170
+ },
171
+ {
172
+ "epoch": 10.0,
173
+ "step": 50,
174
+ "total_flos": 1.565971061889024e+17,
175
+ "train_loss": 0.394845541715622,
176
+ "train_runtime": 86.4231,
177
+ "train_samples_per_second": 72.897,
178
+ "train_steps_per_second": 0.579
179
  }
180
  ],
181
  "logging_steps": 1,
182
+ "max_steps": 50,
183
  "num_input_tokens_seen": 0,
184
  "num_train_epochs": 10,
185
  "save_steps": 500,
 
195
  "attributes": {}
196
  }
197
  },
198
+ "total_flos": 1.565971061889024e+17,
199
  "train_batch_size": 32,
200
  "trial_name": null,
201
  "trial_params": null