nblinh63 commited on
Commit
f8d7c05
·
verified ·
1 Parent(s): 26dec1a

End of training

Browse files
README.md CHANGED
@@ -104,7 +104,7 @@ xformers_attention: null
104
 
105
  This model is a fine-tuned version of [EleutherAI/pythia-14m](https://huggingface.co/EleutherAI/pythia-14m) on the None dataset.
106
  It achieves the following results on the evaluation set:
107
- - Loss: 7.4194
108
 
109
  ## Model description
110
 
@@ -139,9 +139,9 @@ The following hyperparameters were used during training:
139
  | Training Loss | Epoch | Step | Validation Loss |
140
  |:-------------:|:------:|:----:|:---------------:|
141
  | 42.0391 | 0.0013 | 1 | 10.8013 |
142
- | 40.9297 | 0.0040 | 3 | 10.4305 |
143
- | 39.2031 | 0.0079 | 6 | 8.9850 |
144
- | 30.0859 | 0.0119 | 9 | 7.4194 |
145
 
146
 
147
  ### Framework versions
 
104
 
105
  This model is a fine-tuned version of [EleutherAI/pythia-14m](https://huggingface.co/EleutherAI/pythia-14m) on the None dataset.
106
  It achieves the following results on the evaluation set:
107
+ - Loss: 7.5607
108
 
109
  ## Model description
110
 
 
139
  | Training Loss | Epoch | Step | Validation Loss |
140
  |:-------------:|:------:|:----:|:---------------:|
141
  | 42.0391 | 0.0013 | 1 | 10.8013 |
142
+ | 42.5078 | 0.0040 | 3 | 10.4134 |
143
+ | 44.0547 | 0.0079 | 6 | 9.3822 |
144
+ | 30.1562 | 0.0119 | 9 | 7.5607 |
145
 
146
 
147
  ### Framework versions
adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "dense",
24
- "query_key_value",
25
  "dense_4h_to_h",
26
- "dense_h_to_4h"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "dense_h_to_4h",
24
  "dense",
 
25
  "dense_4h_to_h",
26
+ "query_key_value"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36283aa446e5062a903eb43b2ce1ab305aa3809cfbbcbd93dc0d8690076b0448
3
  size 804030
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60b7aad1f1f52839a373900df7cc27805d8e84caf057397c23b5491d74dbd39
3
  size 804030
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2cdd41b8034aebed5f4cfddad21d3fd7abe661ba76b481044ea408ec6239fa06
3
  size 792912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55047a50e41be0e4e6dd996b8c2ea294dd61c14b8f6b1fe8713482713f5c79f2
3
  size 792912
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:097bd66dc3d47f5850103823e5f488e05d11aad4916f7f9c6e0b797fc472f3b8
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a8fc328665b58033bd97cc1ce9c1a9a184f8df499d10df35acdb35b8dc41988
3
  size 6776