Upload folder using huggingface_hub
Browse files- config.yaml +41 -0
- metrics.json +174 -0
- model_1000.pt +3 -0
- model_2000.pt +3 -0
- model_3000.pt +3 -0
- model_4000.pt +3 -0
- model_5000.pt +3 -0
- model_6000.pt +3 -0
- model_7000.pt +3 -0
- state.pt +3 -0
config.yaml
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
config: glm_config.yaml
|
| 2 |
+
model:
|
| 3 |
+
family: gpt2
|
| 4 |
+
n_dims: 10
|
| 5 |
+
n_embd: 256
|
| 6 |
+
n_head: 8
|
| 7 |
+
n_layer: 12
|
| 8 |
+
n_positions: 101
|
| 9 |
+
out_dir: /home/derixu/Documents/in-context-learning-GLM/glm_weights/08229e17-10ab-46c3-b789-fd94c0d91778
|
| 10 |
+
test_run: false
|
| 11 |
+
training:
|
| 12 |
+
batch_size: 64
|
| 13 |
+
curriculum:
|
| 14 |
+
dims:
|
| 15 |
+
end: 10
|
| 16 |
+
inc: 0
|
| 17 |
+
interval: 10000
|
| 18 |
+
start: 10
|
| 19 |
+
points:
|
| 20 |
+
end: 40
|
| 21 |
+
inc: 0
|
| 22 |
+
interval: 10000
|
| 23 |
+
start: 40
|
| 24 |
+
data: gaussian
|
| 25 |
+
keep_every_steps: 1000
|
| 26 |
+
learning_rate: 0.00025
|
| 27 |
+
num_tasks: null
|
| 28 |
+
num_training_examples: null
|
| 29 |
+
resume_id: null
|
| 30 |
+
save_every_steps: 1000
|
| 31 |
+
task: GLM
|
| 32 |
+
task_kwargs:
|
| 33 |
+
function_type: logistic
|
| 34 |
+
scaling: 0.32
|
| 35 |
+
train_steps: 8000
|
| 36 |
+
wandb:
|
| 37 |
+
entity: derryxu
|
| 38 |
+
log_every_steps: 100
|
| 39 |
+
name: null
|
| 40 |
+
notes: ICL GLM training
|
| 41 |
+
project: in-context-training
|
metrics.json
ADDED
|
@@ -0,0 +1,174 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"standard": {
|
| 3 |
+
"gpt2_embd=256_layer=12_head=8": {
|
| 4 |
+
"mean": [
|
| 5 |
+
451.74542236328125,
|
| 6 |
+
446.76708984375,
|
| 7 |
+
398.19073486328125,
|
| 8 |
+
375.18438720703125,
|
| 9 |
+
371.9727783203125,
|
| 10 |
+
413.12603759765625,
|
| 11 |
+
421.4949645996094,
|
| 12 |
+
334.7201232910156,
|
| 13 |
+
354.1137390136719,
|
| 14 |
+
427.40435791015625,
|
| 15 |
+
394.4300231933594,
|
| 16 |
+
377.80303955078125,
|
| 17 |
+
356.6797790527344,
|
| 18 |
+
416.15179443359375,
|
| 19 |
+
386.86138916015625,
|
| 20 |
+
401.1386413574219,
|
| 21 |
+
436.0201110839844,
|
| 22 |
+
376.4483337402344,
|
| 23 |
+
387.7586669921875,
|
| 24 |
+
399.2389831542969,
|
| 25 |
+
368.8802795410156,
|
| 26 |
+
408.1917419433594,
|
| 27 |
+
403.31268310546875,
|
| 28 |
+
366.3211975097656,
|
| 29 |
+
377.75860595703125,
|
| 30 |
+
370.9283752441406,
|
| 31 |
+
342.18292236328125,
|
| 32 |
+
399.34967041015625,
|
| 33 |
+
380.167724609375,
|
| 34 |
+
397.1018371582031,
|
| 35 |
+
369.1687927246094,
|
| 36 |
+
395.45501708984375,
|
| 37 |
+
409.96368408203125,
|
| 38 |
+
399.93292236328125,
|
| 39 |
+
388.31463623046875,
|
| 40 |
+
391.57196044921875,
|
| 41 |
+
399.65045166015625,
|
| 42 |
+
397.49652099609375,
|
| 43 |
+
379.173095703125,
|
| 44 |
+
391.5447692871094
|
| 45 |
+
],
|
| 46 |
+
"std": [
|
| 47 |
+
1036.1168212890625,
|
| 48 |
+
1052.5504150390625,
|
| 49 |
+
985.4584350585938,
|
| 50 |
+
944.4644165039062,
|
| 51 |
+
925.74853515625,
|
| 52 |
+
1008.0751953125,
|
| 53 |
+
1011.539306640625,
|
| 54 |
+
893.5997924804688,
|
| 55 |
+
901.9557495117188,
|
| 56 |
+
1032.4256591796875,
|
| 57 |
+
973.437744140625,
|
| 58 |
+
951.7726440429688,
|
| 59 |
+
938.1434326171875,
|
| 60 |
+
1028.182373046875,
|
| 61 |
+
975.9642944335938,
|
| 62 |
+
983.3131713867188,
|
| 63 |
+
996.96728515625,
|
| 64 |
+
967.6622314453125,
|
| 65 |
+
941.3789672851562,
|
| 66 |
+
995.826171875,
|
| 67 |
+
954.170166015625,
|
| 68 |
+
979.0750732421875,
|
| 69 |
+
948.0242309570312,
|
| 70 |
+
947.281494140625,
|
| 71 |
+
971.1467895507812,
|
| 72 |
+
953.8440551757812,
|
| 73 |
+
885.4946899414062,
|
| 74 |
+
983.6041259765625,
|
| 75 |
+
982.82958984375,
|
| 76 |
+
1004.1043090820312,
|
| 77 |
+
944.0158081054688,
|
| 78 |
+
955.7814331054688,
|
| 79 |
+
995.3485107421875,
|
| 80 |
+
974.4137573242188,
|
| 81 |
+
966.8414306640625,
|
| 82 |
+
966.6190185546875,
|
| 83 |
+
977.1292724609375,
|
| 84 |
+
961.6306762695312,
|
| 85 |
+
959.372802734375,
|
| 86 |
+
966.71142578125
|
| 87 |
+
],
|
| 88 |
+
"bootstrap_low": [
|
| 89 |
+
405.491943359375,
|
| 90 |
+
397.20330810546875,
|
| 91 |
+
353.11041259765625,
|
| 92 |
+
330.30621337890625,
|
| 93 |
+
328.72308349609375,
|
| 94 |
+
366.9914855957031,
|
| 95 |
+
373.15863037109375,
|
| 96 |
+
295.7124938964844,
|
| 97 |
+
314.08428955078125,
|
| 98 |
+
380.3370056152344,
|
| 99 |
+
351.295166015625,
|
| 100 |
+
334.26165771484375,
|
| 101 |
+
317.1861267089844,
|
| 102 |
+
373.317138671875,
|
| 103 |
+
341.4490966796875,
|
| 104 |
+
357.5613708496094,
|
| 105 |
+
388.9658203125,
|
| 106 |
+
334.51727294921875,
|
| 107 |
+
345.2065734863281,
|
| 108 |
+
352.8218994140625,
|
| 109 |
+
327.8773498535156,
|
| 110 |
+
363.7894287109375,
|
| 111 |
+
358.8099060058594,
|
| 112 |
+
323.10565185546875,
|
| 113 |
+
330.771240234375,
|
| 114 |
+
330.10540771484375,
|
| 115 |
+
302.78564453125,
|
| 116 |
+
354.44915771484375,
|
| 117 |
+
336.779052734375,
|
| 118 |
+
355.08282470703125,
|
| 119 |
+
326.2351989746094,
|
| 120 |
+
350.0144348144531,
|
| 121 |
+
365.2312927246094,
|
| 122 |
+
353.1064758300781,
|
| 123 |
+
345.2763366699219,
|
| 124 |
+
351.8453674316406,
|
| 125 |
+
354.5221862792969,
|
| 126 |
+
353.1282958984375,
|
| 127 |
+
336.75628662109375,
|
| 128 |
+
347.83343505859375
|
| 129 |
+
],
|
| 130 |
+
"bootstrap_high": [
|
| 131 |
+
498.749755859375,
|
| 132 |
+
497.474609375,
|
| 133 |
+
447.17474365234375,
|
| 134 |
+
419.853759765625,
|
| 135 |
+
414.72509765625,
|
| 136 |
+
461.81640625,
|
| 137 |
+
471.97161865234375,
|
| 138 |
+
376.7627868652344,
|
| 139 |
+
396.3875732421875,
|
| 140 |
+
477.4115295410156,
|
| 141 |
+
441.86871337890625,
|
| 142 |
+
419.2454528808594,
|
| 143 |
+
400.7157287597656,
|
| 144 |
+
465.502197265625,
|
| 145 |
+
431.4187927246094,
|
| 146 |
+
446.68719482421875,
|
| 147 |
+
485.0738830566406,
|
| 148 |
+
423.566650390625,
|
| 149 |
+
432.2622985839844,
|
| 150 |
+
444.14569091796875,
|
| 151 |
+
413.220458984375,
|
| 152 |
+
452.36328125,
|
| 153 |
+
450.22540283203125,
|
| 154 |
+
413.99169921875,
|
| 155 |
+
419.6934509277344,
|
| 156 |
+
414.348876953125,
|
| 157 |
+
382.47967529296875,
|
| 158 |
+
444.3714904785156,
|
| 159 |
+
429.7515563964844,
|
| 160 |
+
441.1724548339844,
|
| 161 |
+
414.6210021972656,
|
| 162 |
+
438.0732421875,
|
| 163 |
+
456.46710205078125,
|
| 164 |
+
448.4256896972656,
|
| 165 |
+
435.58245849609375,
|
| 166 |
+
437.2386779785156,
|
| 167 |
+
444.68548583984375,
|
| 168 |
+
443.985595703125,
|
| 169 |
+
423.1942443847656,
|
| 170 |
+
438.37725830078125
|
| 171 |
+
]
|
| 172 |
+
}
|
| 173 |
+
}
|
| 174 |
+
}
|
model_1000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8d600a37cddf7274e4c8b29a6a83698dfc53b99cca873a3d08673b553ebf8ab
|
| 3 |
+
size 90145655
|
model_2000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0be812bbfdea829142063c8dc12103eb88307ba8b6b7e5444c576a1f3b00302c
|
| 3 |
+
size 90145655
|
model_3000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fbcc9f206b330f2bd03343b0754eae6eb0ae3e9110b556dcc7e47e3ac1a2226
|
| 3 |
+
size 90145655
|
model_4000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d0c3277ce518c7d787e4a4108a30ec002a22eb7394cb841758cbda57ade47bf
|
| 3 |
+
size 90145655
|
model_5000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6f37999be6d916a388d4b13458c6637e24a65ff109629a3cbe60d1298761879
|
| 3 |
+
size 90145655
|
model_6000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:64ad9d67618f31a060aed5e43dd674da6fa0f33b4fb684c15b85944cdc22ede5
|
| 3 |
+
size 90145655
|
model_7000.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73597c7801accff76132c7e660adfdd039878f6296367e168ea0684e3e66c2e9
|
| 3 |
+
size 90145655
|
state.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f98a8aaf9167eefbe1b62b0b75e29235791106c021e423de0bbdc93bdbc16c0b
|
| 3 |
+
size 166501481
|