Training in progress, epoch 3, checkpoint

Files changed (8) hide show

checkpoint-2250/config.json ADDED Viewed

+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 1536,
+  "initializer_range": 0.02,
+  "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 131072,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.2",
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151667
+}

checkpoint-2250/generation_config.json ADDED Viewed

+{
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "max_new_tokens": 2048,
+  "transformers_version": "4.53.2"
+}

checkpoint-2250/model.safetensors ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:078f5039db0ab14aa76dbdebecb1983daa787f7f1a40afe920c2936f5d0a4d61
+size 3086640776

checkpoint-2250/optimizer.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:accb1db8450c1990099f1be36382f15b664c8049f2763bcd6f30c0437b9f6540
+size 6056651

checkpoint-2250/rng_state.pth ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ecc8d19037d27a117a2fd2ffd165ca51167a61228a3907a4352a6c28073eb8e
+size 14645

checkpoint-2250/scheduler.pt ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:31f393e55181683f501a4ca4b9d0d84036ad322847579ffb4dd26775ea41dd27
+size 1465

checkpoint-2250/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-2250/training_args.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:1da98c6d17f4216728f21e20cf16cead69b93fd52b151e5750bd308b24b24e81
+size 5969