| accelerator: auto | |
| checkpoint_interval: 100 | |
| loss: distillRankNET | |
| max_grad_norm: 1.0 | |
| optimization: | |
| batch_size: 64 | |
| eps: 1.0e-08 | |
| lr: 7.0e-06 | |
| max_epochs: 2000 | |
| num_warmup_steps: 5000 | |
| optimizer_name: adam-w | |
| re_no_l2_regularization: | |
| - \.bias$ | |
| - \.LayerNorm\. | |
| scheduler: true | |
| steps_per_epoch: 100 | |
| warmup_min_factor: 0.0 | |
| weight_decay: 0.0 | |
| precision: null | |
| requirements: duration=12h & cpu(cores=16) & cuda(mem=80G) | |
| sample_max: 0 | |
| sample_rate: 1.0 | |
| strategy: auto | |
| validation: nanobeir | |
| validation_interval: 50 | |
| validation_top_k: 100 | |