{ "adapter_path": "jumbo_adapters", "batch_size": 2, "config": null, "data": "cleaned_data/", "fine_tune_type": "lora", "grad_checkpoint": false, "iters": 10000, "learning_rate": 2e-05, "lora_parameters": { "keys": [ "mlp.gate_proj", "mlp.down_proj", "self_attn.q_proj", "mlp.up_proj", "self_attn.o_proj", "self_attn.v_proj", "self_attn.k_proj" ], "rank": 8, "alpha": 8, "dropout": 0.05, "scale": 16.0 }, "lr_schedule": null, "max_seq_length": 2048, "model": "ministral/Ministral-3b-instruct", "num_layers": 14, "resume_adapter_file": null, "save_every": 100, "seed": 0, "steps_per_eval": 200, "steps_per_report": 10, "test": true, "test_batches": 500, "train": true, "val_batches": 25 }