| { | |
| "experiment_key": "base", | |
| "save_safetensors": true, | |
| "max_shard_size": "10GB", | |
| "local_rank": 0, | |
| "use_gradient_checkpointing": true, | |
| "trainer_key": "lm", | |
| "force_fp32": false, | |
| "force_fp16": false, | |
| "from_gptq": false, | |
| "huggingface_hub_token": null, | |
| "single_gpu": null, | |
| "master_port": 9994, | |
| "deepspeed_stage": null, | |
| "deepspeed_config_path": null, | |
| "fsdp_strategy": "", | |
| "fsdp_offload": true, | |
| "seed": 42, | |
| "stabilize": false, | |
| "norm_fp32": false, | |
| "path_to_env_file": "./.env", | |
| "prepare_dataset": true, | |
| "lora_hub_model_id": null, | |
| "lora_model_local_path": null, | |
| "fused_model_local_path": null, | |
| "fuse_after_training": false, | |
| "quantization_dataset_id": null, | |
| "quantization_max_samples": 1024, | |
| "quantized_model_path": "./quantized_model/", | |
| "quantized_hub_model_id": null, | |
| "quantized_hub_private_repo": true, | |
| "dataset_key": "soda", | |
| "train_local_path_to_data": "./train.jsonl", | |
| "eval_local_path_to_data": null, | |
| "shuffle": true, | |
| "max_eval_samples": 1000, | |
| "add_eval_to_train_if_no_path": false, | |
| "tokenizer_name_or_path": null, | |
| "tokenizer_use_fast": null, | |
| "tokenizer_padding_side": null, | |
| "collator_key": "lm", | |
| "max_length": 2048, | |
| "model_name_or_path": "bn22/Mistral-7B-Instruct-v0.1-sharded", | |
| "push_to_hub_bos_add_bos_token": false, | |
| "use_flash_attention_2": false, | |
| "trust_remote_code": false, | |
| "device_map": null, | |
| "prepare_model_for_kbit_training": true, | |
| "offload_folder": null, | |
| "load_in_8bit": false, | |
| "load_in_4bit": true, | |
| "llm_int8_threshold": 6.0, | |
| "llm_int8_has_fp16_weight": true, | |
| "bnb_4bit_use_double_quant": true, | |
| "bnb_4bit_quant_type": "nf4", | |
| "bnb_quantize_after_model_init": false, | |
| "gptq_bits": 4, | |
| "gptq_group_size": 128, | |
| "gptq_disable_exllama": true, | |
| "apply_lora": true, | |
| "lora_rank": 8, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.1, | |
| "raw_lora_target_modules": "all", | |
| "output_dir": "./outputs/", | |
| "per_device_train_batch_size": 2, | |
| "do_eval": false, | |
| "per_device_eval_batch_size": null, | |
| "gradient_accumulation_steps": 2, | |
| "eval_accumulation_steps": null, | |
| "eval_delay": 0, | |
| "eval_steps": 1000, | |
| "warmup_steps": 5, | |
| "max_steps": 100, | |
| "num_train_epochs": 1, | |
| "learning_rate": 0.0002, | |
| "max_grad_norm": 1.0, | |
| "weight_decay": 0.001, | |
| "label_smoothing_factor": 0.0, | |
| "logging_steps": 1, | |
| "save_steps": 10, | |
| "save_total_limit": 1, | |
| "optim": "paged_adamw_8bit", | |
| "push_to_hub": true, | |
| "hub_model_id": "TachyHealthResearch/Thealth-Mistral", | |
| "hub_private_repo": false, | |
| "neftune_noise_alpha": null, | |
| "project_name": null, | |
| "report_to_wandb": false, | |
| "wandb_api_key": null, | |
| "wandb_project": null, | |
| "wandb_entity": null | |
| } |