Adapter based on Mistral 3B instruct

Files changed (2) hide show

adapter_config.json ADDED Viewed

+{
+    "adapter_path": "jumbo_adapters",
+    "batch_size": 2,
+    "config": null,
+    "data": "cleaned_data/",
+    "fine_tune_type": "lora",
+    "grad_checkpoint": false,
+    "iters": 10000,
+    "learning_rate": 2e-05,
+    "lora_parameters": {
+        "keys": [
+            "mlp.gate_proj",
+            "mlp.down_proj",
+            "self_attn.q_proj",
+            "mlp.up_proj",
+            "self_attn.o_proj",
+            "self_attn.v_proj",
+            "self_attn.k_proj"
+        ],
+        "rank": 8,
+        "alpha": 8,
+        "dropout": 0.05,
+        "scale": 16.0
+    },
+    "lr_schedule": null,
+    "max_seq_length": 2048,
+    "model": "ministral/Ministral-3b-instruct",
+    "num_layers": 14,
+    "resume_adapter_file": null,
+    "save_every": 100,
+    "seed": 0,
+    "steps_per_eval": 200,
+    "steps_per_report": 10,
+    "test": true,
+    "test_batches": 500,
+    "train": true,
+    "val_batches": 25
+}

adapters.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:004a9e19d8d2ae1a2e8a0309eb34c33938e4488a964eaee886f71d9b049f8ee7
+size 36721232