duyphu commited on
Commit
b6f254d
·
verified ·
1 Parent(s): fb0a26c

Training in progress, step 50, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
25
  "v_proj",
26
- "gate_proj",
27
  "up_proj",
28
  "q_proj",
29
- "down_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "k_proj",
24
  "v_proj",
 
25
  "up_proj",
26
  "q_proj",
27
+ "down_proj",
28
+ "o_proj",
29
+ "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c13de3c1483e463dcd12c72778f3619e45b9253c69f2bbf9face7570f03df41
3
  size 50899792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:777e4ffbadc32e8e08cca6ab98a9f54f3a55918daf1b12ded0456e16f65b2aa6
3
  size 50899792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6dc7d56ed8f766d520b3ecadd05407f3f0162827a39ff31479b0a235ba659a4
3
  size 26231300
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e1716c110b05ac3eebd4ca8a6b9587057eece9e57ca2f143e0a3e1b21dd5d97
3
  size 26231300
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a58445dfacae6d3a5c41737710794c0d202d0797b11394bf49fb53aff4510e07
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31bdb696b574973895ff2b747377d3ffb8c746f630155b864dae9e45560ab2da
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37841e69eda911caeb33edeefa0b2f140e72dcce247aeb757b2fe89c00d7887b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1df0528620c07325b8faa7567e59b0c1e86a1f1ee6af1245a69c6c0463fe4e2
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0004220077016405549,
5
  "eval_steps": 10,
6
- "global_step": 26,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11,61 +11,120 @@
11
  {
12
  "epoch": 1.6231065447713652e-05,
13
  "eval_loss": 1.1903656721115112,
14
- "eval_runtime": 1380.2137,
15
- "eval_samples_per_second": 18.796,
16
  "eval_steps_per_second": 9.398,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 8.115532723856825e-05,
21
- "grad_norm": 0.33062881231307983,
22
  "learning_rate": 5e-05,
23
- "loss": 0.9504,
24
  "step": 5
25
  },
26
  {
27
  "epoch": 0.0001623106544771365,
28
- "grad_norm": 0.24923232197761536,
29
  "learning_rate": 0.0001,
30
- "loss": 1.2069,
31
  "step": 10
32
  },
33
  {
34
  "epoch": 0.0001623106544771365,
35
- "eval_loss": 1.188248872756958,
36
- "eval_runtime": 1385.4083,
37
- "eval_samples_per_second": 18.725,
38
- "eval_steps_per_second": 9.363,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.00024346598171570477,
43
- "grad_norm": 0.22104988992214203,
44
  "learning_rate": 9.619397662556435e-05,
45
- "loss": 1.0095,
46
  "step": 15
47
  },
48
  {
49
  "epoch": 0.000324621308954273,
50
- "grad_norm": 0.30212002992630005,
51
  "learning_rate": 8.535533905932738e-05,
52
- "loss": 1.2259,
53
  "step": 20
54
  },
55
  {
56
  "epoch": 0.000324621308954273,
57
- "eval_loss": 1.172216773033142,
58
- "eval_runtime": 1381.6685,
59
- "eval_samples_per_second": 18.776,
60
- "eval_steps_per_second": 9.388,
61
  "step": 20
62
  },
63
  {
64
  "epoch": 0.0004057766361928413,
65
- "grad_norm": 0.2586546540260315,
66
  "learning_rate": 6.91341716182545e-05,
67
- "loss": 1.3021,
68
  "step": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  }
70
  ],
71
  "logging_steps": 5,
@@ -80,12 +139,12 @@
80
  "should_evaluate": false,
81
  "should_log": false,
82
  "should_save": true,
83
- "should_training_stop": false
84
  },
85
  "attributes": {}
86
  }
87
  },
88
- "total_flos": 3362199526440960.0,
89
  "train_batch_size": 2,
90
  "trial_name": null,
91
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0008115532723856826,
5
  "eval_steps": 10,
6
+ "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11
  {
12
  "epoch": 1.6231065447713652e-05,
13
  "eval_loss": 1.1903656721115112,
14
+ "eval_runtime": 1380.1159,
15
+ "eval_samples_per_second": 18.797,
16
  "eval_steps_per_second": 9.398,
17
  "step": 1
18
  },
19
  {
20
  "epoch": 8.115532723856825e-05,
21
+ "grad_norm": 0.3520669639110565,
22
  "learning_rate": 5e-05,
23
+ "loss": 0.9505,
24
  "step": 5
25
  },
26
  {
27
  "epoch": 0.0001623106544771365,
28
+ "grad_norm": 0.2661932110786438,
29
  "learning_rate": 0.0001,
30
+ "loss": 1.2073,
31
  "step": 10
32
  },
33
  {
34
  "epoch": 0.0001623106544771365,
35
+ "eval_loss": 1.1880683898925781,
36
+ "eval_runtime": 1378.339,
37
+ "eval_samples_per_second": 18.821,
38
+ "eval_steps_per_second": 9.411,
39
  "step": 10
40
  },
41
  {
42
  "epoch": 0.00024346598171570477,
43
+ "grad_norm": 0.2401016354560852,
44
  "learning_rate": 9.619397662556435e-05,
45
+ "loss": 1.0093,
46
  "step": 15
47
  },
48
  {
49
  "epoch": 0.000324621308954273,
50
+ "grad_norm": 0.31756964325904846,
51
  "learning_rate": 8.535533905932738e-05,
52
+ "loss": 1.226,
53
  "step": 20
54
  },
55
  {
56
  "epoch": 0.000324621308954273,
57
+ "eval_loss": 1.172145128250122,
58
+ "eval_runtime": 1379.8373,
59
+ "eval_samples_per_second": 18.801,
60
+ "eval_steps_per_second": 9.4,
61
  "step": 20
62
  },
63
  {
64
  "epoch": 0.0004057766361928413,
65
+ "grad_norm": 0.275453120470047,
66
  "learning_rate": 6.91341716182545e-05,
67
+ "loss": 1.3019,
68
  "step": 25
69
+ },
70
+ {
71
+ "epoch": 0.00048693196343140954,
72
+ "grad_norm": 0.31876248121261597,
73
+ "learning_rate": 5e-05,
74
+ "loss": 0.9136,
75
+ "step": 30
76
+ },
77
+ {
78
+ "epoch": 0.00048693196343140954,
79
+ "eval_loss": 1.1538145542144775,
80
+ "eval_runtime": 1380.8853,
81
+ "eval_samples_per_second": 18.786,
82
+ "eval_steps_per_second": 9.393,
83
+ "step": 30
84
+ },
85
+ {
86
+ "epoch": 0.0005680872906699778,
87
+ "grad_norm": 0.29820069670677185,
88
+ "learning_rate": 3.086582838174551e-05,
89
+ "loss": 1.0017,
90
+ "step": 35
91
+ },
92
+ {
93
+ "epoch": 0.000649242617908546,
94
+ "grad_norm": 0.42904895544052124,
95
+ "learning_rate": 1.4644660940672627e-05,
96
+ "loss": 1.1429,
97
+ "step": 40
98
+ },
99
+ {
100
+ "epoch": 0.000649242617908546,
101
+ "eval_loss": 1.1452189683914185,
102
+ "eval_runtime": 1380.8707,
103
+ "eval_samples_per_second": 18.787,
104
+ "eval_steps_per_second": 9.393,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 0.0007303979451471144,
109
+ "grad_norm": 0.25957685708999634,
110
+ "learning_rate": 3.8060233744356633e-06,
111
+ "loss": 1.1583,
112
+ "step": 45
113
+ },
114
+ {
115
+ "epoch": 0.0008115532723856826,
116
+ "grad_norm": 0.2517766058444977,
117
+ "learning_rate": 0.0,
118
+ "loss": 0.9416,
119
+ "step": 50
120
+ },
121
+ {
122
+ "epoch": 0.0008115532723856826,
123
+ "eval_loss": 1.1437370777130127,
124
+ "eval_runtime": 1383.7319,
125
+ "eval_samples_per_second": 18.748,
126
+ "eval_steps_per_second": 9.374,
127
+ "step": 50
128
  }
129
  ],
130
  "logging_steps": 5,
 
139
  "should_evaluate": false,
140
  "should_log": false,
141
  "should_save": true,
142
+ "should_training_stop": true
143
  },
144
  "attributes": {}
145
  }
146
  },
147
+ "total_flos": 6396379586887680.0,
148
  "train_batch_size": 2,
149
  "trial_name": null,
150
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cbc4585b463e9d7165ff7e593c25639eb96b872978fc7f1fc73ec0952b925fa
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34467d464a1014f734add1fd005384274b5eb325f890f9c8d872bab038d4721a
3
  size 6776