error577 commited on
Commit
8615a13
·
verified ·
1 Parent(s): b8604b2

Training in progress, step 2000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f3c07d9f82d0dc1cd318f9c20b0b1529213bd8a4dff1faf8ce688ecb58571b31
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04f0ffe434929d301fdbb4a477e60354c9ebe32f51f7b66c3b76af3239107135
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c6e3eb9856154789133b5419a7189f12fe843ca44f1cdff35733d6104dd62c3
3
  size 35778900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdbfaab9c4b638c5be7b9b85b9ef2e77afadf2b9fc84a8c66872d06993947042
3
  size 35778900
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7d0b81074e1bc1527d9374bce86bdfa8d8c27dcb2ca9fd9115819f321defb1fa
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65a6bdf298e0592c7069487ab9f49a4212493c8c923f2cd738adc0345d5bd504
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cf0b4281d39a881ce1af5e046e19d073ff2816738244a5102a35ebec9a11074
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff620fa731932bfad032c9b2869fcd08718601bc76c1b44cf37971591fc72fbd
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.3732219636440277,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1850",
4
- "epoch": 0.2677559987642031,
5
  "eval_steps": 50,
6
- "global_step": 1950,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -13977,6 +13977,364 @@
13977
  "eval_samples_per_second": 13.321,
13978
  "eval_steps_per_second": 6.66,
13979
  "step": 1950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13980
  }
13981
  ],
13982
  "logging_steps": 1,
@@ -13991,7 +14349,7 @@
13991
  "early_stopping_threshold": 0.0
13992
  },
13993
  "attributes": {
13994
- "early_stopping_patience_counter": 2
13995
  }
13996
  },
13997
  "TrainerControl": {
@@ -14000,12 +14358,12 @@
14000
  "should_evaluate": false,
14001
  "should_log": false,
14002
  "should_save": true,
14003
- "should_training_stop": false
14004
  },
14005
  "attributes": {}
14006
  }
14007
  },
14008
- "total_flos": 2.049408924844032e+16,
14009
  "train_batch_size": 2,
14010
  "trial_name": null,
14011
  "trial_params": null
 
1
  {
2
  "best_metric": 0.3732219636440277,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-1850",
4
+ "epoch": 0.27462153719405447,
5
  "eval_steps": 50,
6
+ "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
13977
  "eval_samples_per_second": 13.321,
13978
  "eval_steps_per_second": 6.66,
13979
  "step": 1950
13980
+ },
13981
+ {
13982
+ "epoch": 0.2678933095328001,
13983
+ "grad_norm": 0.0776594951748848,
13984
+ "learning_rate": 2.990468147050729e-07,
13985
+ "loss": 0.315,
13986
+ "step": 1951
13987
+ },
13988
+ {
13989
+ "epoch": 0.2680306203013971,
13990
+ "grad_norm": 0.07933443784713745,
13991
+ "learning_rate": 2.869711549758014e-07,
13992
+ "loss": 0.3611,
13993
+ "step": 1952
13994
+ },
13995
+ {
13996
+ "epoch": 0.26816793106999415,
13997
+ "grad_norm": 0.07569830119609833,
13998
+ "learning_rate": 2.7514400612855815e-07,
13999
+ "loss": 0.3146,
14000
+ "step": 1953
14001
+ },
14002
+ {
14003
+ "epoch": 0.26830524183859117,
14004
+ "grad_norm": 0.08394397795200348,
14005
+ "learning_rate": 2.635653976396979e-07,
14006
+ "loss": 0.3673,
14007
+ "step": 1954
14008
+ },
14009
+ {
14010
+ "epoch": 0.26844255260718825,
14011
+ "grad_norm": 0.07972821593284607,
14012
+ "learning_rate": 2.522353583661263e-07,
14013
+ "loss": 0.358,
14014
+ "step": 1955
14015
+ },
14016
+ {
14017
+ "epoch": 0.26857986337578527,
14018
+ "grad_norm": 0.08286363631486893,
14019
+ "learning_rate": 2.4115391654524477e-07,
14020
+ "loss": 0.3535,
14021
+ "step": 1956
14022
+ },
14023
+ {
14024
+ "epoch": 0.2687171741443823,
14025
+ "grad_norm": 0.0780869573354721,
14026
+ "learning_rate": 2.303210997949168e-07,
14027
+ "loss": 0.3211,
14028
+ "step": 1957
14029
+ },
14030
+ {
14031
+ "epoch": 0.2688544849129793,
14032
+ "grad_norm": 0.0882067084312439,
14033
+ "learning_rate": 2.1973693511334604e-07,
14034
+ "loss": 0.339,
14035
+ "step": 1958
14036
+ },
14037
+ {
14038
+ "epoch": 0.26899179568157633,
14039
+ "grad_norm": 0.07985836267471313,
14040
+ "learning_rate": 2.094014488790097e-07,
14041
+ "loss": 0.3112,
14042
+ "step": 1959
14043
+ },
14044
+ {
14045
+ "epoch": 0.26912910645017335,
14046
+ "grad_norm": 0.09253552556037903,
14047
+ "learning_rate": 1.9931466685065847e-07,
14048
+ "loss": 0.3242,
14049
+ "step": 1960
14050
+ },
14051
+ {
14052
+ "epoch": 0.26926641721877037,
14053
+ "grad_norm": 0.08512873947620392,
14054
+ "learning_rate": 1.894766141671833e-07,
14055
+ "loss": 0.2778,
14056
+ "step": 1961
14057
+ },
14058
+ {
14059
+ "epoch": 0.2694037279873674,
14060
+ "grad_norm": 0.09439520537853241,
14061
+ "learning_rate": 1.7988731534757108e-07,
14062
+ "loss": 0.3481,
14063
+ "step": 1962
14064
+ },
14065
+ {
14066
+ "epoch": 0.2695410387559644,
14067
+ "grad_norm": 0.0956578180193901,
14068
+ "learning_rate": 1.7054679429086007e-07,
14069
+ "loss": 0.3387,
14070
+ "step": 1963
14071
+ },
14072
+ {
14073
+ "epoch": 0.2696783495245615,
14074
+ "grad_norm": 0.10661093145608902,
14075
+ "learning_rate": 1.6145507427606232e-07,
14076
+ "loss": 0.3995,
14077
+ "step": 1964
14078
+ },
14079
+ {
14080
+ "epoch": 0.2698156602931585,
14081
+ "grad_norm": 0.10120601952075958,
14082
+ "learning_rate": 1.5261217796211923e-07,
14083
+ "loss": 0.3887,
14084
+ "step": 1965
14085
+ },
14086
+ {
14087
+ "epoch": 0.26995297106175553,
14088
+ "grad_norm": 0.11400753259658813,
14089
+ "learning_rate": 1.4401812738782383e-07,
14090
+ "loss": 0.38,
14091
+ "step": 1966
14092
+ },
14093
+ {
14094
+ "epoch": 0.27009028183035255,
14095
+ "grad_norm": 0.1119614839553833,
14096
+ "learning_rate": 1.3567294397180964e-07,
14097
+ "loss": 0.3617,
14098
+ "step": 1967
14099
+ },
14100
+ {
14101
+ "epoch": 0.2702275925989496,
14102
+ "grad_norm": 0.1144985556602478,
14103
+ "learning_rate": 1.2757664851245078e-07,
14104
+ "loss": 0.3941,
14105
+ "step": 1968
14106
+ },
14107
+ {
14108
+ "epoch": 0.2703649033675466,
14109
+ "grad_norm": 0.12001071870326996,
14110
+ "learning_rate": 1.1972926118780647e-07,
14111
+ "loss": 0.3585,
14112
+ "step": 1969
14113
+ },
14114
+ {
14115
+ "epoch": 0.2705022141361436,
14116
+ "grad_norm": 0.10658982396125793,
14117
+ "learning_rate": 1.1213080155564326e-07,
14118
+ "loss": 0.3269,
14119
+ "step": 1970
14120
+ },
14121
+ {
14122
+ "epoch": 0.27063952490474064,
14123
+ "grad_norm": 0.1165112853050232,
14124
+ "learning_rate": 1.0478128855327952e-07,
14125
+ "loss": 0.4069,
14126
+ "step": 1971
14127
+ },
14128
+ {
14129
+ "epoch": 0.27077683567333766,
14130
+ "grad_norm": 0.12376672774553299,
14131
+ "learning_rate": 9.768074049762988e-08,
14132
+ "loss": 0.4226,
14133
+ "step": 1972
14134
+ },
14135
+ {
14136
+ "epoch": 0.27091414644193473,
14137
+ "grad_norm": 0.13154427707195282,
14138
+ "learning_rate": 9.082917508510536e-08,
14139
+ "loss": 0.4798,
14140
+ "step": 1973
14141
+ },
14142
+ {
14143
+ "epoch": 0.27105145721053175,
14144
+ "grad_norm": 0.1435077041387558,
14145
+ "learning_rate": 8.42266093916022e-08,
14146
+ "loss": 0.4423,
14147
+ "step": 1974
14148
+ },
14149
+ {
14150
+ "epoch": 0.2711887679791288,
14151
+ "grad_norm": 0.13770116865634918,
14152
+ "learning_rate": 7.787305987243532e-08,
14153
+ "loss": 0.3837,
14154
+ "step": 1975
14155
+ },
14156
+ {
14157
+ "epoch": 0.2713260787477258,
14158
+ "grad_norm": 0.11175678670406342,
14159
+ "learning_rate": 7.1768542362316e-08,
14160
+ "loss": 0.3307,
14161
+ "step": 1976
14162
+ },
14163
+ {
14164
+ "epoch": 0.2714633895163228,
14165
+ "grad_norm": 0.13395845890045166,
14166
+ "learning_rate": 6.591307207527431e-08,
14167
+ "loss": 0.3738,
14168
+ "step": 1977
14169
+ },
14170
+ {
14171
+ "epoch": 0.27160070028491984,
14172
+ "grad_norm": 0.1312989890575409,
14173
+ "learning_rate": 6.030666360469228e-08,
14174
+ "loss": 0.4427,
14175
+ "step": 1978
14176
+ },
14177
+ {
14178
+ "epoch": 0.27173801105351686,
14179
+ "grad_norm": 0.14491136372089386,
14180
+ "learning_rate": 5.494933092318189e-08,
14181
+ "loss": 0.4382,
14182
+ "step": 1979
14183
+ },
14184
+ {
14185
+ "epoch": 0.2718753218221139,
14186
+ "grad_norm": 0.12479250878095627,
14187
+ "learning_rate": 4.9841087382618276e-08,
14188
+ "loss": 0.3451,
14189
+ "step": 1980
14190
+ },
14191
+ {
14192
+ "epoch": 0.2720126325907109,
14193
+ "grad_norm": 0.12496712803840637,
14194
+ "learning_rate": 4.498194571409542e-08,
14195
+ "loss": 0.3951,
14196
+ "step": 1981
14197
+ },
14198
+ {
14199
+ "epoch": 0.272149943359308,
14200
+ "grad_norm": 0.1415296047925949,
14201
+ "learning_rate": 4.037191802783724e-08,
14202
+ "loss": 0.407,
14203
+ "step": 1982
14204
+ },
14205
+ {
14206
+ "epoch": 0.272287254127905,
14207
+ "grad_norm": 0.12588736414909363,
14208
+ "learning_rate": 3.6011015813253166e-08,
14209
+ "loss": 0.3288,
14210
+ "step": 1983
14211
+ },
14212
+ {
14213
+ "epoch": 0.272424564896502,
14214
+ "grad_norm": 0.1493213176727295,
14215
+ "learning_rate": 3.18992499388493e-08,
14216
+ "loss": 0.437,
14217
+ "step": 1984
14218
+ },
14219
+ {
14220
+ "epoch": 0.27256187566509904,
14221
+ "grad_norm": 0.15246812999248505,
14222
+ "learning_rate": 2.8036630652206187e-08,
14223
+ "loss": 0.3758,
14224
+ "step": 1985
14225
+ },
14226
+ {
14227
+ "epoch": 0.27269918643369606,
14228
+ "grad_norm": 0.15464863181114197,
14229
+ "learning_rate": 2.4423167579978868e-08,
14230
+ "loss": 0.4871,
14231
+ "step": 1986
14232
+ },
14233
+ {
14234
+ "epoch": 0.2728364972022931,
14235
+ "grad_norm": 0.15227794647216797,
14236
+ "learning_rate": 2.105886972787463e-08,
14237
+ "loss": 0.4728,
14238
+ "step": 1987
14239
+ },
14240
+ {
14241
+ "epoch": 0.2729738079708901,
14242
+ "grad_norm": 0.15416234731674194,
14243
+ "learning_rate": 1.7943745480586417e-08,
14244
+ "loss": 0.4987,
14245
+ "step": 1988
14246
+ },
14247
+ {
14248
+ "epoch": 0.2731111187394871,
14249
+ "grad_norm": 0.15620464086532593,
14250
+ "learning_rate": 1.5077802601826118e-08,
14251
+ "loss": 0.4204,
14252
+ "step": 1989
14253
+ },
14254
+ {
14255
+ "epoch": 0.27324842950808415,
14256
+ "grad_norm": 0.14840669929981232,
14257
+ "learning_rate": 1.2461048234269079e-08,
14258
+ "loss": 0.3661,
14259
+ "step": 1990
14260
+ },
14261
+ {
14262
+ "epoch": 0.2733857402766812,
14263
+ "grad_norm": 0.1491561383008957,
14264
+ "learning_rate": 1.0093488899554082e-08,
14265
+ "loss": 0.3791,
14266
+ "step": 1991
14267
+ },
14268
+ {
14269
+ "epoch": 0.27352305104527824,
14270
+ "grad_norm": 0.13692405819892883,
14271
+ "learning_rate": 7.975130498238948e-09,
14272
+ "loss": 0.3049,
14273
+ "step": 1992
14274
+ },
14275
+ {
14276
+ "epoch": 0.27366036181387526,
14277
+ "grad_norm": 0.19245244562625885,
14278
+ "learning_rate": 6.105978309856042e-09,
14279
+ "loss": 0.4476,
14280
+ "step": 1993
14281
+ },
14282
+ {
14283
+ "epoch": 0.2737976725824723,
14284
+ "grad_norm": 0.19619667530059814,
14285
+ "learning_rate": 4.486036992790155e-09,
14286
+ "loss": 0.4232,
14287
+ "step": 1994
14288
+ },
14289
+ {
14290
+ "epoch": 0.2739349833510693,
14291
+ "grad_norm": 0.1624627262353897,
14292
+ "learning_rate": 3.115310584367315e-09,
14293
+ "loss": 0.3799,
14294
+ "step": 1995
14295
+ },
14296
+ {
14297
+ "epoch": 0.2740722941196663,
14298
+ "grad_norm": 0.14835092425346375,
14299
+ "learning_rate": 1.99380250079928e-09,
14300
+ "loss": 0.2921,
14301
+ "step": 1996
14302
+ },
14303
+ {
14304
+ "epoch": 0.27420960488826335,
14305
+ "grad_norm": 0.16958799958229065,
14306
+ "learning_rate": 1.1215155371835373e-09,
14307
+ "loss": 0.3982,
14308
+ "step": 1997
14309
+ },
14310
+ {
14311
+ "epoch": 0.27434691565686037,
14312
+ "grad_norm": 0.17734107375144958,
14313
+ "learning_rate": 4.984518674699956e-10,
14314
+ "loss": 0.415,
14315
+ "step": 1998
14316
+ },
14317
+ {
14318
+ "epoch": 0.2744842264254574,
14319
+ "grad_norm": 0.18290874361991882,
14320
+ "learning_rate": 1.2461304450539502e-10,
14321
+ "loss": 0.4189,
14322
+ "step": 1999
14323
+ },
14324
+ {
14325
+ "epoch": 0.27462153719405447,
14326
+ "grad_norm": 0.18318958580493927,
14327
+ "learning_rate": 0.0,
14328
+ "loss": 0.3441,
14329
+ "step": 2000
14330
+ },
14331
+ {
14332
+ "epoch": 0.27462153719405447,
14333
+ "eval_loss": 0.37363573908805847,
14334
+ "eval_runtime": 89.1948,
14335
+ "eval_samples_per_second": 13.342,
14336
+ "eval_steps_per_second": 6.671,
14337
+ "step": 2000
14338
  }
14339
  ],
14340
  "logging_steps": 1,
 
14349
  "early_stopping_threshold": 0.0
14350
  },
14351
  "attributes": {
14352
+ "early_stopping_patience_counter": 3
14353
  }
14354
  },
14355
  "TrainerControl": {
 
14358
  "should_evaluate": false,
14359
  "should_log": false,
14360
  "should_save": true,
14361
+ "should_training_stop": true
14362
  },
14363
  "attributes": {}
14364
  }
14365
  },
14366
+ "total_flos": 2.09999650455552e+16,
14367
  "train_batch_size": 2,
14368
  "trial_name": null,
14369
  "trial_params": null