Luke-Bergen commited on
Commit
389bf38
·
verified ·
1 Parent(s): a4da138

Create trainer_config.json

Browse files
Files changed (1) hide show
  1. trainer_config.json +43 -0
trainer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_dir": "./mineral-nano-1",
3
+ "overwrite_output_dir": true,
4
+ "do_train": true,
5
+ "do_eval": true,
6
+ "evaluation_strategy": "steps",
7
+ "eval_steps": 500,
8
+ "per_device_train_batch_size": 8,
9
+ "per_device_eval_batch_size": 8,
10
+ "gradient_accumulation_steps": 4,
11
+ "learning_rate": 5e-5,
12
+ "weight_decay": 0.01,
13
+ "adam_beta1": 0.9,
14
+ "adam_beta2": 0.999,
15
+ "adam_epsilon": 1e-8,
16
+ "max_grad_norm": 1.0,
17
+ "num_train_epochs": 3,
18
+ "max_steps": -1,
19
+ "lr_scheduler_type": "cosine",
20
+ "warmup_ratio": 0.1,
21
+ "warmup_steps": 0,
22
+ "logging_dir": "./logs",
23
+ "logging_strategy": "steps",
24
+ "logging_steps": 100,
25
+ "save_strategy": "steps",
26
+ "save_steps": 1000,
27
+ "save_total_limit": 3,
28
+ "seed": 42,
29
+ "fp16": false,
30
+ "bf16": true,
31
+ "dataloader_num_workers": 4,
32
+ "load_best_model_at_end": true,
33
+ "metric_for_best_model": "eval_loss",
34
+ "greater_is_better": false,
35
+ "group_by_length": false,
36
+ "report_to": ["tensorboard"],
37
+ "ddp_find_unused_parameters": false,
38
+ "optim": "adamw_torch",
39
+ "gradient_checkpointing": true,
40
+ "gradient_checkpointing_kwargs": {
41
+ "use_reentrant": false
42
+ }
43
+ }