| base_model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| dtype: bfloat16 | |
| merge_method: breadcrumbs_ties | |
| parameters: | |
| int8_mask: 1.0 | |
| normalize: 0.0 | |
| slices: | |
| - sources: | |
| - layer_range: [0, 2] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.050387850856855765 | |
| - filter: mlp | |
| value: -0.17075015661203768 | |
| - value: -0.008041653902986862 | |
| weight: | |
| - filter: self_attn | |
| value: 0.0999312941470471 | |
| - filter: mlp | |
| value: 0.541727762184749 | |
| - value: 0.6837012779994258 | |
| - layer_range: [0, 2] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8218846237599902 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9254078866667358 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.11213758231875963 | |
| - filter: mlp | |
| value: 0.021586098873668948 | |
| - value: -0.12827998218659437 | |
| weight: | |
| - filter: self_attn | |
| value: 0.40391646444657003 | |
| - filter: mlp | |
| value: 0.623121864641881 | |
| - value: 0.5967833694632534 | |
| - layer_range: [0, 2] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [2, 4] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8079479346300947 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.710146185559419 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.1383609589681566 | |
| - filter: mlp | |
| value: 0.21188532059635062 | |
| - value: 0.2994723556443468 | |
| weight: | |
| - filter: self_attn | |
| value: 0.48107070906079974 | |
| - filter: mlp | |
| value: 0.5848073552919492 | |
| - value: 0.4583842493359253 | |
| - layer_range: [2, 4] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 0.934378153535579 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.073192612278188 | |
| - filter: mlp | |
| value: 0.07939126555063317 | |
| - value: -0.06891845030175699 | |
| weight: | |
| - filter: self_attn | |
| value: 0.32120386994101 | |
| - filter: mlp | |
| value: 0.5001108459121922 | |
| - value: 0.9138710221666694 | |
| - layer_range: [2, 4] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [4, 6] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 0.7237519222177541 | |
| - value: 0.776951124863642 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.2265121048274062 | |
| - filter: mlp | |
| value: -0.1757947421960496 | |
| - value: -0.11401593728931929 | |
| weight: | |
| - filter: self_attn | |
| value: 0.6448742737026658 | |
| - filter: mlp | |
| value: 0.13809748641457986 | |
| - value: 0.3950550285769662 | |
| - layer_range: [4, 6] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9649359194114893 | |
| - filter: mlp | |
| value: 0.916637032428399 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.16291684846287688 | |
| - filter: mlp | |
| value: -0.19013548712121703 | |
| - value: 0.038409066391918795 | |
| weight: | |
| - filter: self_attn | |
| value: 0.1977358472772336 | |
| - filter: mlp | |
| value: 0.22661167907612348 | |
| - value: 0.6426575016448257 | |
| - layer_range: [4, 6] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [6, 8] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8727809666891416 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.5160677785559116 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.14245180617134273 | |
| - filter: mlp | |
| value: 0.08189992601998919 | |
| - value: -0.1038827997670827 | |
| weight: | |
| - filter: self_attn | |
| value: 0.23575676914257698 | |
| - filter: mlp | |
| value: 0.4047231670507743 | |
| - value: 0.34207794631274374 | |
| - layer_range: [6, 8] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.576775501046583 | |
| - filter: mlp | |
| value: -0.046028636298718645 | |
| - value: -0.024161321403060265 | |
| weight: | |
| - filter: self_attn | |
| value: 0.833089842843994 | |
| - filter: mlp | |
| value: 0.5434667434613458 | |
| - value: 0.2946693008513797 | |
| - layer_range: [6, 8] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [8, 10] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9930269337531187 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.4549980941970383 | |
| - filter: mlp | |
| value: 0.10362988739411173 | |
| - value: -0.43800391668559174 | |
| weight: | |
| - filter: self_attn | |
| value: 0.19663450954683193 | |
| - filter: mlp | |
| value: 0.16783989984505265 | |
| - value: 0.7465091417598162 | |
| - layer_range: [8, 10] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.797370597380894 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.0665958634205702 | |
| - filter: mlp | |
| value: -0.058297473060129834 | |
| - value: -0.38206760673090134 | |
| weight: | |
| - filter: self_attn | |
| value: 0.7015967347604024 | |
| - filter: mlp | |
| value: 0.7733694864324641 | |
| - value: 0.7636921732342238 | |
| - layer_range: [8, 10] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [10, 12] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8047576867589878 | |
| - filter: mlp | |
| value: 0.8852533319203653 | |
| - value: 0.7707342647603538 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.054343999574509694 | |
| - filter: mlp | |
| value: -0.3465154355167133 | |
| - value: 0.022315854655582765 | |
| weight: | |
| - filter: self_attn | |
| value: 0.4396484757291151 | |
| - filter: mlp | |
| value: 0.34318396468602314 | |
| - value: 0.8236034746664869 | |
| - layer_range: [10, 12] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9058471193805165 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.1221058588826469 | |
| - filter: mlp | |
| value: -0.4004985640890659 | |
| - value: 0.3219195440395816 | |
| weight: | |
| - filter: self_attn | |
| value: 0.3565443612269864 | |
| - filter: mlp | |
| value: 0.2817057075232181 | |
| - value: 0.5934890337808251 | |
| - layer_range: [10, 12] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [12, 14] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.027897116191693133 | |
| - filter: mlp | |
| value: -0.1765379388255607 | |
| - value: 0.09108936063176161 | |
| weight: | |
| - filter: self_attn | |
| value: 0.4499753137521779 | |
| - filter: mlp | |
| value: 0.901296236087911 | |
| - value: 0.3548680126954006 | |
| - layer_range: [12, 14] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8973815150776497 | |
| - filter: mlp | |
| value: 0.6029953465961999 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.10393082898402586 | |
| - filter: mlp | |
| value: 0.15993577688878796 | |
| - value: 0.011410411917833683 | |
| weight: | |
| - filter: self_attn | |
| value: 0.2211644023056492 | |
| - filter: mlp | |
| value: 0.5677387594231849 | |
| - value: 0.1316535663010981 | |
| - layer_range: [12, 14] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [14, 16] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9584597245055072 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.17789727632680347 | |
| - filter: mlp | |
| value: 0.2182263440314275 | |
| - value: 0.1449547656126498 | |
| weight: | |
| - filter: self_attn | |
| value: 0.4551004762874224 | |
| - filter: mlp | |
| value: 0.9182082826762857 | |
| - value: 0.3736989395186422 | |
| - layer_range: [14, 16] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.7414465107848625 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.7894887419395906 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.07343933395880992 | |
| - filter: mlp | |
| value: 0.250800731630588 | |
| - value: -0.2948778134297542 | |
| weight: | |
| - filter: self_attn | |
| value: 0.43125199001016495 | |
| - filter: mlp | |
| value: 0.6182726353394477 | |
| - value: 0.838902157446268 | |
| - layer_range: [14, 16] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [16, 18] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9474287877268394 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9613380133344519 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.08608895546593046 | |
| - filter: mlp | |
| value: -0.07275416053291164 | |
| - value: -0.5796137860399382 | |
| weight: | |
| - filter: self_attn | |
| value: 0.5593420897751296 | |
| - filter: mlp | |
| value: 0.7339447992880666 | |
| - value: 0.5447558586689005 | |
| - layer_range: [16, 18] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9321536960575384 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9613033408813294 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.20610728738224296 | |
| - filter: mlp | |
| value: 0.2002206706624053 | |
| - value: -0.45349278793293785 | |
| weight: | |
| - filter: self_attn | |
| value: 0.16162975594196963 | |
| - filter: mlp | |
| value: 0.21262726992327483 | |
| - value: 0.061213622827234075 | |
| - layer_range: [16, 18] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [18, 20] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.03922456593148313 | |
| - filter: mlp | |
| value: 0.3318035822806869 | |
| - value: -0.10373990685028205 | |
| weight: | |
| - filter: self_attn | |
| value: 0.8254441016674987 | |
| - filter: mlp | |
| value: 0.4568039342431161 | |
| - value: 0.3152648515747969 | |
| - layer_range: [18, 20] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9807358937293073 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.22734036563128657 | |
| - filter: mlp | |
| value: 0.26113222150270854 | |
| - value: 0.17739039022957015 | |
| weight: | |
| - filter: self_attn | |
| value: 0.33759130475641996 | |
| - filter: mlp | |
| value: 0.616639215544168 | |
| - value: 0.47560658618977714 | |
| - layer_range: [18, 20] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [20, 22] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9394514442960196 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9885037757465567 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.17365709450334324 | |
| - filter: mlp | |
| value: 0.0712279381144505 | |
| - value: 0.11809665485306464 | |
| weight: | |
| - filter: self_attn | |
| value: 0.485610337254665 | |
| - filter: mlp | |
| value: 0.8406593173801935 | |
| - value: 0.5024102481819739 | |
| - layer_range: [20, 22] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.09980202641768818 | |
| - filter: mlp | |
| value: 0.051454493742856926 | |
| - value: 0.14619126408666103 | |
| weight: | |
| - filter: self_attn | |
| value: 0.54772456079406 | |
| - filter: mlp | |
| value: 0.3440893571099615 | |
| - value: 0.3747271233512448 | |
| - layer_range: [20, 22] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [22, 24] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 0.9474712362889293 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.16020032978118146 | |
| - filter: mlp | |
| value: -0.025085248873309034 | |
| - value: 0.06046174910893976 | |
| weight: | |
| - filter: self_attn | |
| value: 0.8654189362345427 | |
| - filter: mlp | |
| value: 0.6344956382288498 | |
| - value: 0.6383979001549549 | |
| - layer_range: [22, 24] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8240762427167851 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 0.9004913821398048 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.12224186789525764 | |
| - filter: mlp | |
| value: -0.25877585460700525 | |
| - value: 0.35149388360871714 | |
| weight: | |
| - filter: self_attn | |
| value: 0.4294356408713786 | |
| - filter: mlp | |
| value: 0.3920647298630233 | |
| - value: 0.795891295390721 | |
| - layer_range: [22, 24] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [24, 26] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.16915580088030202 | |
| - filter: mlp | |
| value: 0.2602652727555053 | |
| - value: 0.16985672723305376 | |
| weight: | |
| - filter: self_attn | |
| value: 0.420377024485687 | |
| - filter: mlp | |
| value: 0.3401141209432324 | |
| - value: 0.4953511256159331 | |
| - layer_range: [24, 26] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.7290652609253236 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.1039167464696765 | |
| - filter: mlp | |
| value: -0.18476572570059685 | |
| - value: 0.1221387313921081 | |
| weight: | |
| - filter: self_attn | |
| value: 0.2925002157134928 | |
| - filter: mlp | |
| value: 0.3854740639588027 | |
| - value: 0.555448110317977 | |
| - layer_range: [24, 26] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [26, 28] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 1.0 | |
| - filter: mlp | |
| value: 0.9104496350690235 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.24831264214235005 | |
| - filter: mlp | |
| value: -0.03903149241855605 | |
| - value: 0.14189425093398259 | |
| weight: | |
| - filter: self_attn | |
| value: 0.7685811138035815 | |
| - filter: mlp | |
| value: 0.06535011571274918 | |
| - value: 0.696502559577317 | |
| - layer_range: [26, 28] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.9236218028490522 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.2451400735890047 | |
| - filter: mlp | |
| value: -0.21555851418482214 | |
| - value: 0.020418471695148876 | |
| weight: | |
| - filter: self_attn | |
| value: 0.451368534421561 | |
| - filter: mlp | |
| value: 0.27412879847687055 | |
| - value: 0.18339776770537336 | |
| - layer_range: [26, 28] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [28, 30] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8590812961904566 | |
| - filter: mlp | |
| value: 1.0 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.06934549536310654 | |
| - filter: mlp | |
| value: -0.28464693250998063 | |
| - value: -0.0588491947891552 | |
| weight: | |
| - filter: self_attn | |
| value: 0.26716389671655294 | |
| - filter: mlp | |
| value: 0.8228280162386532 | |
| - value: 0.24197568479527135 | |
| - layer_range: [28, 30] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.7277181780542642 | |
| - filter: mlp | |
| value: 0.74166025738732 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: 0.1772650150670655 | |
| - filter: mlp | |
| value: 0.06545031487123437 | |
| - value: -0.28681451125993446 | |
| weight: | |
| - filter: self_attn | |
| value: 0.5781944040541174 | |
| - filter: mlp | |
| value: 0.2288692970435767 | |
| - value: 0.689751088930503 | |
| - layer_range: [28, 30] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| - sources: | |
| - layer_range: [30, 32] | |
| model: ./evo-storage/input_models/Yosegi-0601_1486698715 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8177341862620365 | |
| - filter: mlp | |
| value: 0.8875629677599377 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.06572527259889459 | |
| - filter: mlp | |
| value: -0.18979543285938766 | |
| - value: -0.24122036571646263 | |
| weight: | |
| - filter: self_attn | |
| value: 0.5818433594657613 | |
| - filter: mlp | |
| value: 0.36676821100234736 | |
| - value: 0.3580688869263428 | |
| - layer_range: [30, 32] | |
| model: ./evo-storage/input_models/Ninja-2B_JP_706546503 | |
| parameters: | |
| density: | |
| - filter: self_attn | |
| value: 0.8306036003344672 | |
| - filter: mlp | |
| value: 0.6993970248745297 | |
| - value: 1.0 | |
| gamma: | |
| - filter: self_attn | |
| value: -0.20599853236581384 | |
| - filter: mlp | |
| value: -0.2001187634455465 | |
| - value: -0.07654635090020837 | |
| weight: | |
| - filter: self_attn | |
| value: 0.37120677279712305 | |
| - filter: mlp | |
| value: 0.13105486609905853 | |
| - value: 0.7204857820148367 | |
| - layer_range: [30, 32] | |
| model: ./evo-storage/input_models/Yosegi-0603_3063110135 | |
| tokenizer_source: union |