Training in progress, step 2000, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 69527352
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:04f0ffe434929d301fdbb4a477e60354c9ebe32f51f7b66c3b76af3239107135
|
| 3 |
size 69527352
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35778900
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fdbfaab9c4b638c5be7b9b85b9ef2e77afadf2b9fc84a8c66872d06993947042
|
| 3 |
size 35778900
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65a6bdf298e0592c7069487ab9f49a4212493c8c923f2cd738adc0345d5bd504
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff620fa731932bfad032c9b2869fcd08718601bc76c1b44cf37971591fc72fbd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.3732219636440277,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1850",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 50,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -13977,6 +13977,364 @@
|
|
| 13977 |
"eval_samples_per_second": 13.321,
|
| 13978 |
"eval_steps_per_second": 6.66,
|
| 13979 |
"step": 1950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13980 |
}
|
| 13981 |
],
|
| 13982 |
"logging_steps": 1,
|
|
@@ -13991,7 +14349,7 @@
|
|
| 13991 |
"early_stopping_threshold": 0.0
|
| 13992 |
},
|
| 13993 |
"attributes": {
|
| 13994 |
-
"early_stopping_patience_counter":
|
| 13995 |
}
|
| 13996 |
},
|
| 13997 |
"TrainerControl": {
|
|
@@ -14000,12 +14358,12 @@
|
|
| 14000 |
"should_evaluate": false,
|
| 14001 |
"should_log": false,
|
| 14002 |
"should_save": true,
|
| 14003 |
-
"should_training_stop":
|
| 14004 |
},
|
| 14005 |
"attributes": {}
|
| 14006 |
}
|
| 14007 |
},
|
| 14008 |
-
"total_flos": 2.
|
| 14009 |
"train_batch_size": 2,
|
| 14010 |
"trial_name": null,
|
| 14011 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.3732219636440277,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1850",
|
| 4 |
+
"epoch": 0.27462153719405447,
|
| 5 |
"eval_steps": 50,
|
| 6 |
+
"global_step": 2000,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 13977 |
"eval_samples_per_second": 13.321,
|
| 13978 |
"eval_steps_per_second": 6.66,
|
| 13979 |
"step": 1950
|
| 13980 |
+
},
|
| 13981 |
+
{
|
| 13982 |
+
"epoch": 0.2678933095328001,
|
| 13983 |
+
"grad_norm": 0.0776594951748848,
|
| 13984 |
+
"learning_rate": 2.990468147050729e-07,
|
| 13985 |
+
"loss": 0.315,
|
| 13986 |
+
"step": 1951
|
| 13987 |
+
},
|
| 13988 |
+
{
|
| 13989 |
+
"epoch": 0.2680306203013971,
|
| 13990 |
+
"grad_norm": 0.07933443784713745,
|
| 13991 |
+
"learning_rate": 2.869711549758014e-07,
|
| 13992 |
+
"loss": 0.3611,
|
| 13993 |
+
"step": 1952
|
| 13994 |
+
},
|
| 13995 |
+
{
|
| 13996 |
+
"epoch": 0.26816793106999415,
|
| 13997 |
+
"grad_norm": 0.07569830119609833,
|
| 13998 |
+
"learning_rate": 2.7514400612855815e-07,
|
| 13999 |
+
"loss": 0.3146,
|
| 14000 |
+
"step": 1953
|
| 14001 |
+
},
|
| 14002 |
+
{
|
| 14003 |
+
"epoch": 0.26830524183859117,
|
| 14004 |
+
"grad_norm": 0.08394397795200348,
|
| 14005 |
+
"learning_rate": 2.635653976396979e-07,
|
| 14006 |
+
"loss": 0.3673,
|
| 14007 |
+
"step": 1954
|
| 14008 |
+
},
|
| 14009 |
+
{
|
| 14010 |
+
"epoch": 0.26844255260718825,
|
| 14011 |
+
"grad_norm": 0.07972821593284607,
|
| 14012 |
+
"learning_rate": 2.522353583661263e-07,
|
| 14013 |
+
"loss": 0.358,
|
| 14014 |
+
"step": 1955
|
| 14015 |
+
},
|
| 14016 |
+
{
|
| 14017 |
+
"epoch": 0.26857986337578527,
|
| 14018 |
+
"grad_norm": 0.08286363631486893,
|
| 14019 |
+
"learning_rate": 2.4115391654524477e-07,
|
| 14020 |
+
"loss": 0.3535,
|
| 14021 |
+
"step": 1956
|
| 14022 |
+
},
|
| 14023 |
+
{
|
| 14024 |
+
"epoch": 0.2687171741443823,
|
| 14025 |
+
"grad_norm": 0.0780869573354721,
|
| 14026 |
+
"learning_rate": 2.303210997949168e-07,
|
| 14027 |
+
"loss": 0.3211,
|
| 14028 |
+
"step": 1957
|
| 14029 |
+
},
|
| 14030 |
+
{
|
| 14031 |
+
"epoch": 0.2688544849129793,
|
| 14032 |
+
"grad_norm": 0.0882067084312439,
|
| 14033 |
+
"learning_rate": 2.1973693511334604e-07,
|
| 14034 |
+
"loss": 0.339,
|
| 14035 |
+
"step": 1958
|
| 14036 |
+
},
|
| 14037 |
+
{
|
| 14038 |
+
"epoch": 0.26899179568157633,
|
| 14039 |
+
"grad_norm": 0.07985836267471313,
|
| 14040 |
+
"learning_rate": 2.094014488790097e-07,
|
| 14041 |
+
"loss": 0.3112,
|
| 14042 |
+
"step": 1959
|
| 14043 |
+
},
|
| 14044 |
+
{
|
| 14045 |
+
"epoch": 0.26912910645017335,
|
| 14046 |
+
"grad_norm": 0.09253552556037903,
|
| 14047 |
+
"learning_rate": 1.9931466685065847e-07,
|
| 14048 |
+
"loss": 0.3242,
|
| 14049 |
+
"step": 1960
|
| 14050 |
+
},
|
| 14051 |
+
{
|
| 14052 |
+
"epoch": 0.26926641721877037,
|
| 14053 |
+
"grad_norm": 0.08512873947620392,
|
| 14054 |
+
"learning_rate": 1.894766141671833e-07,
|
| 14055 |
+
"loss": 0.2778,
|
| 14056 |
+
"step": 1961
|
| 14057 |
+
},
|
| 14058 |
+
{
|
| 14059 |
+
"epoch": 0.2694037279873674,
|
| 14060 |
+
"grad_norm": 0.09439520537853241,
|
| 14061 |
+
"learning_rate": 1.7988731534757108e-07,
|
| 14062 |
+
"loss": 0.3481,
|
| 14063 |
+
"step": 1962
|
| 14064 |
+
},
|
| 14065 |
+
{
|
| 14066 |
+
"epoch": 0.2695410387559644,
|
| 14067 |
+
"grad_norm": 0.0956578180193901,
|
| 14068 |
+
"learning_rate": 1.7054679429086007e-07,
|
| 14069 |
+
"loss": 0.3387,
|
| 14070 |
+
"step": 1963
|
| 14071 |
+
},
|
| 14072 |
+
{
|
| 14073 |
+
"epoch": 0.2696783495245615,
|
| 14074 |
+
"grad_norm": 0.10661093145608902,
|
| 14075 |
+
"learning_rate": 1.6145507427606232e-07,
|
| 14076 |
+
"loss": 0.3995,
|
| 14077 |
+
"step": 1964
|
| 14078 |
+
},
|
| 14079 |
+
{
|
| 14080 |
+
"epoch": 0.2698156602931585,
|
| 14081 |
+
"grad_norm": 0.10120601952075958,
|
| 14082 |
+
"learning_rate": 1.5261217796211923e-07,
|
| 14083 |
+
"loss": 0.3887,
|
| 14084 |
+
"step": 1965
|
| 14085 |
+
},
|
| 14086 |
+
{
|
| 14087 |
+
"epoch": 0.26995297106175553,
|
| 14088 |
+
"grad_norm": 0.11400753259658813,
|
| 14089 |
+
"learning_rate": 1.4401812738782383e-07,
|
| 14090 |
+
"loss": 0.38,
|
| 14091 |
+
"step": 1966
|
| 14092 |
+
},
|
| 14093 |
+
{
|
| 14094 |
+
"epoch": 0.27009028183035255,
|
| 14095 |
+
"grad_norm": 0.1119614839553833,
|
| 14096 |
+
"learning_rate": 1.3567294397180964e-07,
|
| 14097 |
+
"loss": 0.3617,
|
| 14098 |
+
"step": 1967
|
| 14099 |
+
},
|
| 14100 |
+
{
|
| 14101 |
+
"epoch": 0.2702275925989496,
|
| 14102 |
+
"grad_norm": 0.1144985556602478,
|
| 14103 |
+
"learning_rate": 1.2757664851245078e-07,
|
| 14104 |
+
"loss": 0.3941,
|
| 14105 |
+
"step": 1968
|
| 14106 |
+
},
|
| 14107 |
+
{
|
| 14108 |
+
"epoch": 0.2703649033675466,
|
| 14109 |
+
"grad_norm": 0.12001071870326996,
|
| 14110 |
+
"learning_rate": 1.1972926118780647e-07,
|
| 14111 |
+
"loss": 0.3585,
|
| 14112 |
+
"step": 1969
|
| 14113 |
+
},
|
| 14114 |
+
{
|
| 14115 |
+
"epoch": 0.2705022141361436,
|
| 14116 |
+
"grad_norm": 0.10658982396125793,
|
| 14117 |
+
"learning_rate": 1.1213080155564326e-07,
|
| 14118 |
+
"loss": 0.3269,
|
| 14119 |
+
"step": 1970
|
| 14120 |
+
},
|
| 14121 |
+
{
|
| 14122 |
+
"epoch": 0.27063952490474064,
|
| 14123 |
+
"grad_norm": 0.1165112853050232,
|
| 14124 |
+
"learning_rate": 1.0478128855327952e-07,
|
| 14125 |
+
"loss": 0.4069,
|
| 14126 |
+
"step": 1971
|
| 14127 |
+
},
|
| 14128 |
+
{
|
| 14129 |
+
"epoch": 0.27077683567333766,
|
| 14130 |
+
"grad_norm": 0.12376672774553299,
|
| 14131 |
+
"learning_rate": 9.768074049762988e-08,
|
| 14132 |
+
"loss": 0.4226,
|
| 14133 |
+
"step": 1972
|
| 14134 |
+
},
|
| 14135 |
+
{
|
| 14136 |
+
"epoch": 0.27091414644193473,
|
| 14137 |
+
"grad_norm": 0.13154427707195282,
|
| 14138 |
+
"learning_rate": 9.082917508510536e-08,
|
| 14139 |
+
"loss": 0.4798,
|
| 14140 |
+
"step": 1973
|
| 14141 |
+
},
|
| 14142 |
+
{
|
| 14143 |
+
"epoch": 0.27105145721053175,
|
| 14144 |
+
"grad_norm": 0.1435077041387558,
|
| 14145 |
+
"learning_rate": 8.42266093916022e-08,
|
| 14146 |
+
"loss": 0.4423,
|
| 14147 |
+
"step": 1974
|
| 14148 |
+
},
|
| 14149 |
+
{
|
| 14150 |
+
"epoch": 0.2711887679791288,
|
| 14151 |
+
"grad_norm": 0.13770116865634918,
|
| 14152 |
+
"learning_rate": 7.787305987243532e-08,
|
| 14153 |
+
"loss": 0.3837,
|
| 14154 |
+
"step": 1975
|
| 14155 |
+
},
|
| 14156 |
+
{
|
| 14157 |
+
"epoch": 0.2713260787477258,
|
| 14158 |
+
"grad_norm": 0.11175678670406342,
|
| 14159 |
+
"learning_rate": 7.1768542362316e-08,
|
| 14160 |
+
"loss": 0.3307,
|
| 14161 |
+
"step": 1976
|
| 14162 |
+
},
|
| 14163 |
+
{
|
| 14164 |
+
"epoch": 0.2714633895163228,
|
| 14165 |
+
"grad_norm": 0.13395845890045166,
|
| 14166 |
+
"learning_rate": 6.591307207527431e-08,
|
| 14167 |
+
"loss": 0.3738,
|
| 14168 |
+
"step": 1977
|
| 14169 |
+
},
|
| 14170 |
+
{
|
| 14171 |
+
"epoch": 0.27160070028491984,
|
| 14172 |
+
"grad_norm": 0.1312989890575409,
|
| 14173 |
+
"learning_rate": 6.030666360469228e-08,
|
| 14174 |
+
"loss": 0.4427,
|
| 14175 |
+
"step": 1978
|
| 14176 |
+
},
|
| 14177 |
+
{
|
| 14178 |
+
"epoch": 0.27173801105351686,
|
| 14179 |
+
"grad_norm": 0.14491136372089386,
|
| 14180 |
+
"learning_rate": 5.494933092318189e-08,
|
| 14181 |
+
"loss": 0.4382,
|
| 14182 |
+
"step": 1979
|
| 14183 |
+
},
|
| 14184 |
+
{
|
| 14185 |
+
"epoch": 0.2718753218221139,
|
| 14186 |
+
"grad_norm": 0.12479250878095627,
|
| 14187 |
+
"learning_rate": 4.9841087382618276e-08,
|
| 14188 |
+
"loss": 0.3451,
|
| 14189 |
+
"step": 1980
|
| 14190 |
+
},
|
| 14191 |
+
{
|
| 14192 |
+
"epoch": 0.2720126325907109,
|
| 14193 |
+
"grad_norm": 0.12496712803840637,
|
| 14194 |
+
"learning_rate": 4.498194571409542e-08,
|
| 14195 |
+
"loss": 0.3951,
|
| 14196 |
+
"step": 1981
|
| 14197 |
+
},
|
| 14198 |
+
{
|
| 14199 |
+
"epoch": 0.272149943359308,
|
| 14200 |
+
"grad_norm": 0.1415296047925949,
|
| 14201 |
+
"learning_rate": 4.037191802783724e-08,
|
| 14202 |
+
"loss": 0.407,
|
| 14203 |
+
"step": 1982
|
| 14204 |
+
},
|
| 14205 |
+
{
|
| 14206 |
+
"epoch": 0.272287254127905,
|
| 14207 |
+
"grad_norm": 0.12588736414909363,
|
| 14208 |
+
"learning_rate": 3.6011015813253166e-08,
|
| 14209 |
+
"loss": 0.3288,
|
| 14210 |
+
"step": 1983
|
| 14211 |
+
},
|
| 14212 |
+
{
|
| 14213 |
+
"epoch": 0.272424564896502,
|
| 14214 |
+
"grad_norm": 0.1493213176727295,
|
| 14215 |
+
"learning_rate": 3.18992499388493e-08,
|
| 14216 |
+
"loss": 0.437,
|
| 14217 |
+
"step": 1984
|
| 14218 |
+
},
|
| 14219 |
+
{
|
| 14220 |
+
"epoch": 0.27256187566509904,
|
| 14221 |
+
"grad_norm": 0.15246812999248505,
|
| 14222 |
+
"learning_rate": 2.8036630652206187e-08,
|
| 14223 |
+
"loss": 0.3758,
|
| 14224 |
+
"step": 1985
|
| 14225 |
+
},
|
| 14226 |
+
{
|
| 14227 |
+
"epoch": 0.27269918643369606,
|
| 14228 |
+
"grad_norm": 0.15464863181114197,
|
| 14229 |
+
"learning_rate": 2.4423167579978868e-08,
|
| 14230 |
+
"loss": 0.4871,
|
| 14231 |
+
"step": 1986
|
| 14232 |
+
},
|
| 14233 |
+
{
|
| 14234 |
+
"epoch": 0.2728364972022931,
|
| 14235 |
+
"grad_norm": 0.15227794647216797,
|
| 14236 |
+
"learning_rate": 2.105886972787463e-08,
|
| 14237 |
+
"loss": 0.4728,
|
| 14238 |
+
"step": 1987
|
| 14239 |
+
},
|
| 14240 |
+
{
|
| 14241 |
+
"epoch": 0.2729738079708901,
|
| 14242 |
+
"grad_norm": 0.15416234731674194,
|
| 14243 |
+
"learning_rate": 1.7943745480586417e-08,
|
| 14244 |
+
"loss": 0.4987,
|
| 14245 |
+
"step": 1988
|
| 14246 |
+
},
|
| 14247 |
+
{
|
| 14248 |
+
"epoch": 0.2731111187394871,
|
| 14249 |
+
"grad_norm": 0.15620464086532593,
|
| 14250 |
+
"learning_rate": 1.5077802601826118e-08,
|
| 14251 |
+
"loss": 0.4204,
|
| 14252 |
+
"step": 1989
|
| 14253 |
+
},
|
| 14254 |
+
{
|
| 14255 |
+
"epoch": 0.27324842950808415,
|
| 14256 |
+
"grad_norm": 0.14840669929981232,
|
| 14257 |
+
"learning_rate": 1.2461048234269079e-08,
|
| 14258 |
+
"loss": 0.3661,
|
| 14259 |
+
"step": 1990
|
| 14260 |
+
},
|
| 14261 |
+
{
|
| 14262 |
+
"epoch": 0.2733857402766812,
|
| 14263 |
+
"grad_norm": 0.1491561383008957,
|
| 14264 |
+
"learning_rate": 1.0093488899554082e-08,
|
| 14265 |
+
"loss": 0.3791,
|
| 14266 |
+
"step": 1991
|
| 14267 |
+
},
|
| 14268 |
+
{
|
| 14269 |
+
"epoch": 0.27352305104527824,
|
| 14270 |
+
"grad_norm": 0.13692405819892883,
|
| 14271 |
+
"learning_rate": 7.975130498238948e-09,
|
| 14272 |
+
"loss": 0.3049,
|
| 14273 |
+
"step": 1992
|
| 14274 |
+
},
|
| 14275 |
+
{
|
| 14276 |
+
"epoch": 0.27366036181387526,
|
| 14277 |
+
"grad_norm": 0.19245244562625885,
|
| 14278 |
+
"learning_rate": 6.105978309856042e-09,
|
| 14279 |
+
"loss": 0.4476,
|
| 14280 |
+
"step": 1993
|
| 14281 |
+
},
|
| 14282 |
+
{
|
| 14283 |
+
"epoch": 0.2737976725824723,
|
| 14284 |
+
"grad_norm": 0.19619667530059814,
|
| 14285 |
+
"learning_rate": 4.486036992790155e-09,
|
| 14286 |
+
"loss": 0.4232,
|
| 14287 |
+
"step": 1994
|
| 14288 |
+
},
|
| 14289 |
+
{
|
| 14290 |
+
"epoch": 0.2739349833510693,
|
| 14291 |
+
"grad_norm": 0.1624627262353897,
|
| 14292 |
+
"learning_rate": 3.115310584367315e-09,
|
| 14293 |
+
"loss": 0.3799,
|
| 14294 |
+
"step": 1995
|
| 14295 |
+
},
|
| 14296 |
+
{
|
| 14297 |
+
"epoch": 0.2740722941196663,
|
| 14298 |
+
"grad_norm": 0.14835092425346375,
|
| 14299 |
+
"learning_rate": 1.99380250079928e-09,
|
| 14300 |
+
"loss": 0.2921,
|
| 14301 |
+
"step": 1996
|
| 14302 |
+
},
|
| 14303 |
+
{
|
| 14304 |
+
"epoch": 0.27420960488826335,
|
| 14305 |
+
"grad_norm": 0.16958799958229065,
|
| 14306 |
+
"learning_rate": 1.1215155371835373e-09,
|
| 14307 |
+
"loss": 0.3982,
|
| 14308 |
+
"step": 1997
|
| 14309 |
+
},
|
| 14310 |
+
{
|
| 14311 |
+
"epoch": 0.27434691565686037,
|
| 14312 |
+
"grad_norm": 0.17734107375144958,
|
| 14313 |
+
"learning_rate": 4.984518674699956e-10,
|
| 14314 |
+
"loss": 0.415,
|
| 14315 |
+
"step": 1998
|
| 14316 |
+
},
|
| 14317 |
+
{
|
| 14318 |
+
"epoch": 0.2744842264254574,
|
| 14319 |
+
"grad_norm": 0.18290874361991882,
|
| 14320 |
+
"learning_rate": 1.2461304450539502e-10,
|
| 14321 |
+
"loss": 0.4189,
|
| 14322 |
+
"step": 1999
|
| 14323 |
+
},
|
| 14324 |
+
{
|
| 14325 |
+
"epoch": 0.27462153719405447,
|
| 14326 |
+
"grad_norm": 0.18318958580493927,
|
| 14327 |
+
"learning_rate": 0.0,
|
| 14328 |
+
"loss": 0.3441,
|
| 14329 |
+
"step": 2000
|
| 14330 |
+
},
|
| 14331 |
+
{
|
| 14332 |
+
"epoch": 0.27462153719405447,
|
| 14333 |
+
"eval_loss": 0.37363573908805847,
|
| 14334 |
+
"eval_runtime": 89.1948,
|
| 14335 |
+
"eval_samples_per_second": 13.342,
|
| 14336 |
+
"eval_steps_per_second": 6.671,
|
| 14337 |
+
"step": 2000
|
| 14338 |
}
|
| 14339 |
],
|
| 14340 |
"logging_steps": 1,
|
|
|
|
| 14349 |
"early_stopping_threshold": 0.0
|
| 14350 |
},
|
| 14351 |
"attributes": {
|
| 14352 |
+
"early_stopping_patience_counter": 3
|
| 14353 |
}
|
| 14354 |
},
|
| 14355 |
"TrainerControl": {
|
|
|
|
| 14358 |
"should_evaluate": false,
|
| 14359 |
"should_log": false,
|
| 14360 |
"should_save": true,
|
| 14361 |
+
"should_training_stop": true
|
| 14362 |
},
|
| 14363 |
"attributes": {}
|
| 14364 |
}
|
| 14365 |
},
|
| 14366 |
+
"total_flos": 2.09999650455552e+16,
|
| 14367 |
"train_batch_size": 2,
|
| 14368 |
"trial_name": null,
|
| 14369 |
"trial_params": null
|