qingy2024 commited on
Commit
2622486
·
verified ·
1 Parent(s): 22faa38

Upload checkpoint 4918

Browse files
Files changed (6) hide show
  1. README.md +4 -4
  2. adapter_config.json +1 -1
  3. loss.png +2 -2
  4. optimizer.pt +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +130 -4
README.md CHANGED
@@ -2,7 +2,7 @@
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
- # Gradience T1 7B (Step 4900 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
@@ -15,11 +15,11 @@ library_name: peft
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
- <div style="height: 30px; width: 99.63%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
- 99.6%
20
  </div>
21
  </div>
22
- <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4900 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
 
2
  base_model: Qwen/Qwen2.5-7B-Instruct
3
  library_name: peft
4
  ---
5
+ # Gradience T1 7B (Step 4918 Checkpoint)
6
 
7
  > [!NOTE]
8
  > Training in progress...
 
15
  </head>
16
  <body>
17
  <div style="width: 100%; background-color: #e0e0e0; border-radius: 25px; overflow: hidden; margin: 20px 0;">
18
+ <div style="height: 30px; width: 100.00%; background-color: #44965a; text-align: center; line-height: 30px; color: white; border-radius: 25px 0 0 25px;">
19
+ 100.0%
20
  </div>
21
  </div>
22
+ <p style="font-family: Arial, sans-serif; font-size: 16px;">Progress: 4918 out of 4918 steps</p>
23
  </body>
24
  </html>
25
 
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "./Qwen-2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
5
  "bias": "none",
6
  "eva_config": null,
7
  "exclude_modules": null,
loss.png CHANGED

Git LFS Details

  • SHA256: 78e4b22d4a15c87c21a718df21fe716b05046588c081dda8953d79a6e74b8838
  • Pointer size: 131 Bytes
  • Size of remote file: 110 kB

Git LFS Details

  • SHA256: a8786320aae33d434156c5d81735fa4f50f11426368ad91b7e1b862e84d7d33e
  • Pointer size: 131 Bytes
  • Size of remote file: 111 kB
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8692dc55cf936e814593059ddc130c86529b75ce648f04fffb2c51d8a817cd80
3
  size 82461044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5111f467e847f2750566ffd2cef8bd631d8c7221a6c0019c0c1320c4118e2b98
3
  size 82461044
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfaa254ae737802dd7cd5e65a69fbb58067ebe2f88a794ce9ee8b1c2a69498b4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:506cf36063a4621b49ee486a38867162e37a2f0bf6058c24c0b4f12fa1181aa8
3
  size 1064
trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.9920715592600122,
6
  "eval_steps": 500,
7
- "global_step": 4900,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -34308,6 +34308,132 @@
34308
  "learning_rate": 7.734581721962141e-07,
34309
  "loss": 0.937,
34310
  "step": 4900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34311
  }
34312
  ],
34313
  "logging_steps": 1,
@@ -34322,12 +34448,12 @@
34322
  "should_evaluate": false,
34323
  "should_log": false,
34324
  "should_save": true,
34325
- "should_training_stop": false
34326
  },
34327
  "attributes": {}
34328
  }
34329
  },
34330
- "total_flos": 1.5662724108266447e+19,
34331
  "train_batch_size": 16,
34332
  "trial_name": null,
34333
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.999390119943078,
6
  "eval_steps": 500,
7
+ "global_step": 4918,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
34308
  "learning_rate": 7.734581721962141e-07,
34309
  "loss": 0.937,
34310
  "step": 4900
34311
+ },
34312
+ {
34313
+ "epoch": 1.9924781459646268,
34314
+ "grad_norm": 0.10515905171632767,
34315
+ "learning_rate": 7.327498473437818e-07,
34316
+ "loss": 0.9686,
34317
+ "step": 4901
34318
+ },
34319
+ {
34320
+ "epoch": 1.9928847326692418,
34321
+ "grad_norm": 0.1109880730509758,
34322
+ "learning_rate": 6.920415224913496e-07,
34323
+ "loss": 0.9375,
34324
+ "step": 4902
34325
+ },
34326
+ {
34327
+ "epoch": 1.9932913193738564,
34328
+ "grad_norm": 0.10059867799282074,
34329
+ "learning_rate": 6.513331976389172e-07,
34330
+ "loss": 0.9148,
34331
+ "step": 4903
34332
+ },
34333
+ {
34334
+ "epoch": 1.9936979060784712,
34335
+ "grad_norm": 0.1153227686882019,
34336
+ "learning_rate": 6.106248727864849e-07,
34337
+ "loss": 1.065,
34338
+ "step": 4904
34339
+ },
34340
+ {
34341
+ "epoch": 1.994104492783086,
34342
+ "grad_norm": 0.10817611962556839,
34343
+ "learning_rate": 5.699165479340526e-07,
34344
+ "loss": 0.9162,
34345
+ "step": 4905
34346
+ },
34347
+ {
34348
+ "epoch": 1.9945110794877008,
34349
+ "grad_norm": 0.09951157122850418,
34350
+ "learning_rate": 5.292082230816202e-07,
34351
+ "loss": 0.885,
34352
+ "step": 4906
34353
+ },
34354
+ {
34355
+ "epoch": 1.9949176661923156,
34356
+ "grad_norm": 0.1026596650481224,
34357
+ "learning_rate": 4.884998982291879e-07,
34358
+ "loss": 0.9054,
34359
+ "step": 4907
34360
+ },
34361
+ {
34362
+ "epoch": 1.9953242528969302,
34363
+ "grad_norm": 0.10928881913423538,
34364
+ "learning_rate": 4.4779157337675555e-07,
34365
+ "loss": 0.9206,
34366
+ "step": 4908
34367
+ },
34368
+ {
34369
+ "epoch": 1.9957308396015452,
34370
+ "grad_norm": 0.1039741113781929,
34371
+ "learning_rate": 4.070832485243233e-07,
34372
+ "loss": 0.9762,
34373
+ "step": 4909
34374
+ },
34375
+ {
34376
+ "epoch": 1.9961374263061598,
34377
+ "grad_norm": 0.10720765590667725,
34378
+ "learning_rate": 3.663749236718909e-07,
34379
+ "loss": 0.9376,
34380
+ "step": 4910
34381
+ },
34382
+ {
34383
+ "epoch": 1.9965440130107746,
34384
+ "grad_norm": 0.11087562888860703,
34385
+ "learning_rate": 3.256665988194586e-07,
34386
+ "loss": 1.0135,
34387
+ "step": 4911
34388
+ },
34389
+ {
34390
+ "epoch": 1.9969505997153894,
34391
+ "grad_norm": 0.11333035677671432,
34392
+ "learning_rate": 2.849582739670263e-07,
34393
+ "loss": 0.9378,
34394
+ "step": 4912
34395
+ },
34396
+ {
34397
+ "epoch": 1.997357186420004,
34398
+ "grad_norm": 0.10567180067300797,
34399
+ "learning_rate": 2.4424994911459393e-07,
34400
+ "loss": 0.8727,
34401
+ "step": 4913
34402
+ },
34403
+ {
34404
+ "epoch": 1.997763773124619,
34405
+ "grad_norm": 0.09908761829137802,
34406
+ "learning_rate": 2.0354162426216164e-07,
34407
+ "loss": 0.8175,
34408
+ "step": 4914
34409
+ },
34410
+ {
34411
+ "epoch": 1.9981703598292335,
34412
+ "grad_norm": 0.1148877665400505,
34413
+ "learning_rate": 1.628332994097293e-07,
34414
+ "loss": 0.9689,
34415
+ "step": 4915
34416
+ },
34417
+ {
34418
+ "epoch": 1.9985769465338483,
34419
+ "grad_norm": 0.1073300689458847,
34420
+ "learning_rate": 1.2212497455729696e-07,
34421
+ "loss": 0.9064,
34422
+ "step": 4916
34423
+ },
34424
+ {
34425
+ "epoch": 1.9989835332384631,
34426
+ "grad_norm": 0.10753702372312546,
34427
+ "learning_rate": 8.141664970486465e-08,
34428
+ "loss": 0.9366,
34429
+ "step": 4917
34430
+ },
34431
+ {
34432
+ "epoch": 1.999390119943078,
34433
+ "grad_norm": 0.10542717576026917,
34434
+ "learning_rate": 4.0708324852432326e-08,
34435
+ "loss": 0.8963,
34436
+ "step": 4918
34437
  }
34438
  ],
34439
  "logging_steps": 1,
 
34448
  "should_evaluate": false,
34449
  "should_log": false,
34450
  "should_save": true,
34451
+ "should_training_stop": true
34452
  },
34453
  "attributes": {}
34454
  }
34455
  },
34456
+ "total_flos": 1.57185946392996e+19,
34457
  "train_batch_size": 16,
34458
  "trial_name": null,
34459
  "trial_params": null