AmirHossein2002's picture
Upload folder using huggingface_hub
60d89ff verified
{
"core": {
"model_name": "Qwen/Qwen2.5-3B-Instruct",
"lora_rank": 16,
"max_seq_length": 2048,
"load_in_4bit": 0,
"model_dir": "Final/Qwen2.5-3B-Instruct-calib-grpo-low",
"dataset_name": "gsm8k",
"dataset_split": "train",
"test_dataset_split": "test",
"trainer_type": "grpo_dpo",
"calibration": true
},
"training": {
"learning_rate": 5e-06,
"weight_decay": 0.1,
"max_grad_norm": 0.1,
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 4,
"max_steps": 1000,
"seed": 0
},
"sched_optim": {
"lr_scheduler_type": "cosine",
"warmup_ratio": 0.1,
"optim": "adamw_8bit",
"adam_beta1": 0.9,
"adam_beta2": 0.99
},
"generation": {
"num_generations": 8,
"max_prompt_length": 1024,
"max_completion_length": 1024
},
"algorithm": {
"loss_type": "grpo",
"epsilon": 0.2,
"epsilon_high": 0.2,
"mask_truncated_completions": 0,
"scale_rewards": "group",
"importance_sampling_level": "token"
},
"dpo": {
"lambda_pair": 0.01,
"pair_threshold": 2.0,
"beta_dpo": 0.2,
"pair_mining": "all",
"max_pairs_per_group": 6,
"implicit_ref": true
},
"logging": {
"logging_steps": 1,
"save_steps": 50,
"report_to": "wandb",
"wandb_api_key": null
}
}