dada22231
/

c6e0873a-fbc7-4fb8-9031-8f938c65afdf

@@ -13,9 +13,9 @@
   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
-  "lora_alpha": 64,
   "lora_bias": false,
-  "lora_dropout": 0.1,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
@@ -23,16 +23,16 @@
     "lm_head"
   ],
   "peft_type": "LORA",
-  "r": 32,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
-    "gate_proj",
     "down_proj",
     "q_proj",
-    "v_proj",
-    "k_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

   "layers_pattern": null,
   "layers_to_transform": null,
   "loftq_config": {},
+  "lora_alpha": 16,
   "lora_bias": false,
+  "lora_dropout": 0.05,
   "megatron_config": null,
   "megatron_core": "megatron.core",
   "modules_to_save": [
     "lm_head"
   ],
   "peft_type": "LORA",
+  "r": 8,
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "v_proj",
     "down_proj",
     "q_proj",
+    "gate_proj",
+    "o_proj",
     "up_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:801d6e5ab9e70ed5ab34dc23a5bac023be3e58ce64aa1f6c541f061ccf62330a
-size 859925696

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb82cc31951c6b54abacb82fd761f802cec6ba69e6918bdad64f4f573b062f95
+size 608266288

config.json CHANGED Viewed

@@ -1,27 +1,30 @@
 {
   "_attn_implementation_autoset": true,
   "architectures": [
-    "MistralForCausalLM"
   ],
   "attention_dropout": 0.0,
   "bos_token_id": 1,
-  "eos_token_id": 32000,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
-  "intermediate_size": 14336,
-  "max_position_embeddings": 32768,
-  "model_type": "mistral",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
-  "num_key_value_heads": 8,
   "rms_norm_eps": 1e-05,
-  "rope_theta": 10000.0,
-  "sliding_window": 4096,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.51.3",
   "use_cache": false,
-  "vocab_size": 32002
 }

 {
   "_attn_implementation_autoset": true,
   "architectures": [
+    "LlamaForCausalLM"
   ],
+  "attention_bias": false,
   "attention_dropout": 0.0,
   "bos_token_id": 1,
+  "eos_token_id": 2,
   "head_dim": 128,
   "hidden_act": "silu",
   "hidden_size": 4096,
   "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 16384,
+  "mlp_bias": false,
+  "model_type": "llama",
   "num_attention_heads": 32,
   "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
   "tie_word_embeddings": false,
   "torch_dtype": "bfloat16",
   "transformers_version": "4.51.3",
   "use_cache": false,
+  "vocab_size": 32016
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:115579868250659618667b10584031019b0cb30ee6fe91204e7a38db3cb6c03a
 size 7736

 version https://git-lfs.github.com/spec/v1
+oid sha256:21300eebe6727d3bd918fcf96ba98a35295187962e2bb4d2d04c925e0748730f
 size 7736