accelerator: auto checkpoint_interval: 100 loss: distillRankNET max_grad_norm: 1.0 optimization: batch_size: 32 eps: 1.0e-08 lr: 1.0e-05 max_epochs: 2000 num_warmup_steps: 5000 optimizer_name: adam-w re_no_l2_regularization: - \.bias$ - \.LayerNorm\. scheduler: true steps_per_epoch: 100 warmup_min_factor: 0.0 weight_decay: 0.0 precision: null requirements: duration=20h & cpu(cores=16) & cuda(mem=50G) sample_max: 0 sample_rate: 1.0 strategy: auto validation: nanobeir validation_interval: 50 validation_top_k: 100