File size: 2,392 Bytes
abe04a2
 
 
 
 
 
 
 
 
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
a335c8f
abe04a2
a335c8f
abe04a2
 
 
 
 
a335c8f
 
 
 
 
abe04a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a335c8f
abe04a2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 4.0,
  "eval_steps": 500,
  "global_step": 2060,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.4854368932038835,
      "grad_norm": 0.6541855931282043,
      "learning_rate": 8.786407766990292e-05,
      "loss": 0.8179,
      "step": 250
    },
    {
      "epoch": 0.970873786407767,
      "grad_norm": 0.48072507977485657,
      "learning_rate": 7.572815533980583e-05,
      "loss": 0.4138,
      "step": 500
    },
    {
      "epoch": 1.4563106796116505,
      "grad_norm": 0.4883740544319153,
      "learning_rate": 6.359223300970875e-05,
      "loss": 0.3734,
      "step": 750
    },
    {
      "epoch": 1.941747572815534,
      "grad_norm": 0.45032238960266113,
      "learning_rate": 5.145631067961165e-05,
      "loss": 0.3606,
      "step": 1000
    },
    {
      "epoch": 2.4271844660194173,
      "grad_norm": 0.42356109619140625,
      "learning_rate": 3.9320388349514564e-05,
      "loss": 0.3521,
      "step": 1250
    },
    {
      "epoch": 2.912621359223301,
      "grad_norm": 0.4464420676231384,
      "learning_rate": 2.7184466019417475e-05,
      "loss": 0.3473,
      "step": 1500
    },
    {
      "epoch": 3.3980582524271843,
      "grad_norm": 0.45505988597869873,
      "learning_rate": 1.5048543689320387e-05,
      "loss": 0.3394,
      "step": 1750
    },
    {
      "epoch": 3.883495145631068,
      "grad_norm": 0.40663233399391174,
      "learning_rate": 2.912621359223301e-06,
      "loss": 0.3358,
      "step": 2000
    },
    {
      "epoch": 4.0,
      "step": 2060,
      "total_flos": 5.02338223728341e+17,
      "train_loss": 0.41515450986843666,
      "train_runtime": 8856.6028,
      "train_samples_per_second": 59.543,
      "train_steps_per_second": 0.233
    }
  ],
  "logging_steps": 250,
  "max_steps": 2060,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 5.02338223728341e+17,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}