diff --git a/.gitattributes b/.gitattributes index c8881cc1b7ac2c1b0f51aabf626a8420c7073196..3a854d831262fe2011fd6cb44a48bb4e6e78c1cf 100644 --- a/.gitattributes +++ b/.gitattributes @@ -52,3 +52,11 @@ gemma-2b-dpo/checkpoint-540/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-distilled/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-distilled/checkpoint-225/tokenizer.json filter=lfs diff=lfs merge=lfs -text gemma-2b-distilled/checkpoint-75/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-150/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-200/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-250/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-300/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-350/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-351/tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemma-9b-dpo/checkpoint-50/tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/gemma-9b-dpo/README.md b/gemma-9b-dpo/README.md new file mode 100644 index 0000000000000000000000000000000000000000..94391496b001eae7bee1cedcbc7b9f6c8a48f7dc --- /dev/null +++ b/gemma-9b-dpo/README.md @@ -0,0 +1,72 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +model_name: gemma-9b-dpo-medgemma-450 +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +licence: license +pipeline_tag: text-generation +--- + +# Model Card for gemma-9b-dpo-medgemma-450 + +This model is a fine-tuned version of [google/gemma-2-9b-it](https://huggingface.co/google/gemma-2-9b-it). +It has been trained using [TRL](https://github.com/huggingface/trl). + +## Quick start + +```python +from transformers import pipeline + +question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?" +generator = pipeline("text-generation", model="None", device="cuda") +output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0] +print(output["generated_text"]) +``` + +## Training procedure + + + + +This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290). + +### Framework versions + +- PEFT 0.18.1 +- TRL: 0.28.0 +- Transformers: 5.2.0 +- Pytorch: 2.10.0 +- Datasets: 4.5.0 +- Tokenizers: 0.22.2 + +## Citations + +Cite DPO as: + +```bibtex +@inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, +} +``` + +Cite TRL as: + +```bibtex +@software{vonwerra2020trl, + title = {{TRL: Transformers Reinforcement Learning}}, + author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, + license = {Apache-2.0}, + url = {https://github.com/huggingface/trl}, + year = {2020} +} +``` \ No newline at end of file diff --git a/gemma-9b-dpo/adapter_config.json b/gemma-9b-dpo/adapter_config.json index b139cbdfcedf0432ed3eb4a16053b7fd3f454ec9..c80c202b52eb25538c92a8e095abe785cdc6f749 100644 --- a/gemma-9b-dpo/adapter_config.json +++ b/gemma-9b-dpo/adapter_config.json @@ -29,13 +29,13 @@ "rank_pattern": {}, "revision": null, "target_modules": [ - "gate_proj", - "q_proj", + "down_proj", "v_proj", - "up_proj", - "o_proj", "k_proj", - "down_proj" + "up_proj", + "q_proj", + "gate_proj", + "o_proj" ], "target_parameters": null, "task_type": "CAUSAL_LM", diff --git a/gemma-9b-dpo/adapter_model.safetensors b/gemma-9b-dpo/adapter_model.safetensors index 21b3bf3070283c50905f52bc31e60d3d4662a041..3ff7314384e504e61384037c1a6cdad1b96b2e49 100644 --- a/gemma-9b-dpo/adapter_model.safetensors +++ b/gemma-9b-dpo/adapter_model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b999ce33083a55536a152a5a3c149746e107f5d28f96abd7f5750c9bf520bed6 +oid sha256:4ea0ff88e00dfb29580d0fa61936b45ac7ee6e5f886f57fd984f2f3854211d5e size 216151256 diff --git a/gemma-9b-dpo/checkpoint-100/README.md b/gemma-9b-dpo/checkpoint-100/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-100/adapter_config.json b/gemma-9b-dpo/checkpoint-100/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-100/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-100/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d03c73153df81c1c1d3ce90d251a311b33c5df19 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7c9f0c8577a228cc7034adc448f4596f9de36d2fa777c37a8d89970d6026ef2 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-100/chat_template.jinja b/gemma-9b-dpo/checkpoint-100/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-100/optimizer.pt b/gemma-9b-dpo/checkpoint-100/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6c59b1109f2dcab91fb48d7b5f555a1c613f9c87 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf0ce732e832a1274480b5520b72d3661260561069eaf76121fe58f232ed9d84 +size 110425877 diff --git a/gemma-9b-dpo/checkpoint-100/rng_state.pth b/gemma-9b-dpo/checkpoint-100/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b059ca2a76606ab54a2502b2b3c9150c6b08b6eb --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250560ab3d528161ab3659b120def6e4a9ab4b457e3399603bbcfa40db3efc90 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-100/scheduler.pt b/gemma-9b-dpo/checkpoint-100/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a79b06a8f35dd669b8cd9fe19ad1bf0e439ef715 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67328bf66accf20808215e57905f6c9930b4b18ece0e4f4e03baee7eb2fb81fa +size 1465 diff --git a/gemma-9b-dpo/checkpoint-100/tokenizer.json b/gemma-9b-dpo/checkpoint-100/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-100/tokenizer_config.json b/gemma-9b-dpo/checkpoint-100/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-100/trainer_state.json b/gemma-9b-dpo/checkpoint-100/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9aefefaaf4da43cd4f63c6b2612aa8be53602dff --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/trainer_state.json @@ -0,0 +1,334 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8602150537634409, + "eval_steps": 500, + "global_step": 100, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-100/training_args.bin b/gemma-9b-dpo/checkpoint-100/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-100/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-150/README.md b/gemma-9b-dpo/checkpoint-150/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-150/adapter_config.json b/gemma-9b-dpo/checkpoint-150/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-150/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-150/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c86e2ad7dcfbceaa321861c9935aad6642a1010 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb4b6a23e87c3afa5d1d7677ba18415494ce40fd39710929e6f25cd9d556c70 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-150/chat_template.jinja b/gemma-9b-dpo/checkpoint-150/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-150/optimizer.pt b/gemma-9b-dpo/checkpoint-150/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b5b8104e6349357a0acfa112f1f0f76f74b923d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:650dffa633e77bcfdec0baf6affbd38388fc15c92b1de262b46fc28615ccf125 +size 110425877 diff --git a/gemma-9b-dpo/checkpoint-150/rng_state.pth b/gemma-9b-dpo/checkpoint-150/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dbebd9e6d61cfcda2eea44484e4b07f4d7cba3fe --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de69a2834426ff9ef8199d077e00892579278af31d8969d77f98235b5cfc010a +size 14645 diff --git a/gemma-9b-dpo/checkpoint-150/scheduler.pt b/gemma-9b-dpo/checkpoint-150/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed29ec25a6ab431f826294a0927697f8a564d1e9 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bddf86a69e8937c848fd7d73834147cc9d2b9b28c3b046c3f4e213af6efdce30 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-150/tokenizer.json b/gemma-9b-dpo/checkpoint-150/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-150/tokenizer_config.json b/gemma-9b-dpo/checkpoint-150/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-150/trainer_state.json b/gemma-9b-dpo/checkpoint-150/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7417d607b6f8576599c090cd9a4b037ad8c21d03 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/trainer_state.json @@ -0,0 +1,484 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2838709677419355, + "eval_steps": 500, + "global_step": 150, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-150/training_args.bin b/gemma-9b-dpo/checkpoint-150/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-150/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-200/README.md b/gemma-9b-dpo/checkpoint-200/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-200/adapter_config.json b/gemma-9b-dpo/checkpoint-200/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-200/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-200/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f1ba4be6cc15a44ce88d89b4140d5ec2d4942aa --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:148d36290eaf7e7ff0d36aa2e69413dd96652ab1ec0ba672aa91c7703cea564e +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-200/chat_template.jinja b/gemma-9b-dpo/checkpoint-200/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-200/optimizer.pt b/gemma-9b-dpo/checkpoint-200/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1829188ee24a7820cb75f4323594bbbbaa58af6 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30ca54e740d552dff3c08af09de59aa65078627e4eaa324f024b288ed4f01d76 +size 110425877 diff --git a/gemma-9b-dpo/checkpoint-200/rng_state.pth b/gemma-9b-dpo/checkpoint-200/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dbebd9e6d61cfcda2eea44484e4b07f4d7cba3fe --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de69a2834426ff9ef8199d077e00892579278af31d8969d77f98235b5cfc010a +size 14645 diff --git a/gemma-9b-dpo/checkpoint-200/scheduler.pt b/gemma-9b-dpo/checkpoint-200/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f421fc2b956411b23f61e887c93589a5a1a1b1cf --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5591e056f6f3a8b46f89ab0e15a297b8ef9e5d149ccdfe0eb7df998b20025b07 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-200/tokenizer.json b/gemma-9b-dpo/checkpoint-200/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-200/tokenizer_config.json b/gemma-9b-dpo/checkpoint-200/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-200/trainer_state.json b/gemma-9b-dpo/checkpoint-200/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7caa82acbd16b6dd6963263cbeeceac4e3853278 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/trainer_state.json @@ -0,0 +1,634 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7139784946236558, + "eval_steps": 500, + "global_step": 200, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + }, + { + "epoch": 1.3268817204301075, + "grad_norm": 1.6490590572357178, + "learning_rate": 1.8761904761904762e-06, + "logits/chosen": -5.267385005950928, + "logits/rejected": -5.265533924102783, + "logps/chosen": -297.99224853515625, + "logps/rejected": -326.91339111328125, + "loss": 0.49877166748046875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.7068697810173035, + "rewards/margins": 0.6336223483085632, + "rewards/rejected": 0.07324743270874023, + "step": 155 + }, + { + "epoch": 1.3698924731182796, + "grad_norm": 1.483849048614502, + "learning_rate": 1.8285714285714288e-06, + "logits/chosen": -5.356790065765381, + "logits/rejected": -5.103802680969238, + "logps/chosen": -294.2845153808594, + "logps/rejected": -274.7852783203125, + "loss": 0.49851350784301757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.6102481484413147, + "rewards/margins": 0.5510420203208923, + "rewards/rejected": 0.05920610576868057, + "step": 160 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.3165900707244873, + "learning_rate": 1.780952380952381e-06, + "logits/chosen": -5.431517124176025, + "logits/rejected": -5.376145362854004, + "logps/chosen": -325.1388244628906, + "logps/rejected": -327.6669616699219, + "loss": 0.4309373378753662, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.6497762799263, + "rewards/margins": 0.9170367121696472, + "rewards/rejected": -0.2672604024410248, + "step": 165 + }, + { + "epoch": 1.4559139784946238, + "grad_norm": 1.5377726554870605, + "learning_rate": 1.7333333333333332e-06, + "logits/chosen": -5.27555513381958, + "logits/rejected": -5.022242069244385, + "logps/chosen": -340.17779541015625, + "logps/rejected": -288.0126647949219, + "loss": 0.4575087547302246, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.5721868276596069, + "rewards/margins": 0.6390407085418701, + "rewards/rejected": -0.06685388088226318, + "step": 170 + }, + { + "epoch": 1.4989247311827958, + "grad_norm": 2.0849504470825195, + "learning_rate": 1.6857142857142858e-06, + "logits/chosen": -5.070017337799072, + "logits/rejected": -5.327781677246094, + "logps/chosen": -360.60809326171875, + "logps/rejected": -292.8214416503906, + "loss": 0.45406789779663087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.748975396156311, + "rewards/margins": 0.6336467266082764, + "rewards/rejected": 0.11532865464687347, + "step": 175 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.4946179389953613, + "learning_rate": 1.638095238095238e-06, + "logits/chosen": -5.378829002380371, + "logits/rejected": -5.2273030281066895, + "logps/chosen": -369.40679931640625, + "logps/rejected": -310.7534484863281, + "loss": 0.40073528289794924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9242110252380371, + "rewards/margins": 0.8946256637573242, + "rewards/rejected": 0.029585417360067368, + "step": 180 + }, + { + "epoch": 1.5849462365591398, + "grad_norm": 1.9285597801208496, + "learning_rate": 1.5904761904761906e-06, + "logits/chosen": -5.534226417541504, + "logits/rejected": -5.36181640625, + "logps/chosen": -285.0791931152344, + "logps/rejected": -258.01654052734375, + "loss": 0.44419097900390625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7649332284927368, + "rewards/margins": 0.6902278661727905, + "rewards/rejected": 0.07470535486936569, + "step": 185 + }, + { + "epoch": 1.6279569892473118, + "grad_norm": 1.4278947114944458, + "learning_rate": 1.5428571428571428e-06, + "logits/chosen": -5.1719889640808105, + "logits/rejected": -5.186745643615723, + "logps/chosen": -366.37286376953125, + "logps/rejected": -310.18048095703125, + "loss": 0.37736806869506834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671168327331543, + "rewards/margins": 1.110413908958435, + "rewards/rejected": 0.3567030429840088, + "step": 190 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.7930585145950317, + "learning_rate": 1.4952380952380954e-06, + "logits/chosen": -5.458104133605957, + "logits/rejected": -5.514155864715576, + "logps/chosen": -288.04083251953125, + "logps/rejected": -311.49090576171875, + "loss": 0.43456592559814455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.7255552411079407, + "rewards/margins": 0.843337893486023, + "rewards/rejected": -0.11778266727924347, + "step": 195 + }, + { + "epoch": 1.7139784946236558, + "grad_norm": 1.3665006160736084, + "learning_rate": 1.4476190476190478e-06, + "logits/chosen": -5.221610069274902, + "logits/rejected": -5.051304340362549, + "logps/chosen": -379.436279296875, + "logps/rejected": -294.8427734375, + "loss": 0.3780463218688965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0199247598648071, + "rewards/margins": 1.0175530910491943, + "rewards/rejected": 0.0023716867435723543, + "step": 200 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-200/training_args.bin b/gemma-9b-dpo/checkpoint-200/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-200/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-250/README.md b/gemma-9b-dpo/checkpoint-250/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-250/adapter_config.json b/gemma-9b-dpo/checkpoint-250/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-250/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-250/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bbdb70e0242b88d6a97d70d120cd00627a6497a --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:704ff1ccb6a1f6beee1d50c669cc8f81d0fbcbb03b1dd612f4e299ec66230ff9 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-250/chat_template.jinja b/gemma-9b-dpo/checkpoint-250/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-250/optimizer.pt b/gemma-9b-dpo/checkpoint-250/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a4e8412c1d8846e68e14e238f72cc4d211bde39d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3573a3bb365b29c41a0cf112e05c12b594cdec400b68371a2726c2d28574913e +size 110425877 diff --git a/gemma-9b-dpo/checkpoint-250/rng_state.pth b/gemma-9b-dpo/checkpoint-250/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..065f385e762194a148dec7fed295c58a3e7c17fa --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-250/scheduler.pt b/gemma-9b-dpo/checkpoint-250/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..932b590fcbfb40d7e5f23713d0d5bb3a1a645794 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f12b49b9980a9d56fac92a1aed3dfea4f671a19e8cfb1c75b50d9e52b6848d73 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-250/tokenizer.json b/gemma-9b-dpo/checkpoint-250/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-250/tokenizer_config.json b/gemma-9b-dpo/checkpoint-250/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-250/trainer_state.json b/gemma-9b-dpo/checkpoint-250/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9df62152923d15082efa90afd9dee9be309206c7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/trainer_state.json @@ -0,0 +1,784 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.1376344086021506, + "eval_steps": 500, + "global_step": 250, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + }, + { + "epoch": 1.3268817204301075, + "grad_norm": 1.6490590572357178, + "learning_rate": 1.8761904761904762e-06, + "logits/chosen": -5.267385005950928, + "logits/rejected": -5.265533924102783, + "logps/chosen": -297.99224853515625, + "logps/rejected": -326.91339111328125, + "loss": 0.49877166748046875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.7068697810173035, + "rewards/margins": 0.6336223483085632, + "rewards/rejected": 0.07324743270874023, + "step": 155 + }, + { + "epoch": 1.3698924731182796, + "grad_norm": 1.483849048614502, + "learning_rate": 1.8285714285714288e-06, + "logits/chosen": -5.356790065765381, + "logits/rejected": -5.103802680969238, + "logps/chosen": -294.2845153808594, + "logps/rejected": -274.7852783203125, + "loss": 0.49851350784301757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.6102481484413147, + "rewards/margins": 0.5510420203208923, + "rewards/rejected": 0.05920610576868057, + "step": 160 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.3165900707244873, + "learning_rate": 1.780952380952381e-06, + "logits/chosen": -5.431517124176025, + "logits/rejected": -5.376145362854004, + "logps/chosen": -325.1388244628906, + "logps/rejected": -327.6669616699219, + "loss": 0.4309373378753662, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.6497762799263, + "rewards/margins": 0.9170367121696472, + "rewards/rejected": -0.2672604024410248, + "step": 165 + }, + { + "epoch": 1.4559139784946238, + "grad_norm": 1.5377726554870605, + "learning_rate": 1.7333333333333332e-06, + "logits/chosen": -5.27555513381958, + "logits/rejected": -5.022242069244385, + "logps/chosen": -340.17779541015625, + "logps/rejected": -288.0126647949219, + "loss": 0.4575087547302246, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.5721868276596069, + "rewards/margins": 0.6390407085418701, + "rewards/rejected": -0.06685388088226318, + "step": 170 + }, + { + "epoch": 1.4989247311827958, + "grad_norm": 2.0849504470825195, + "learning_rate": 1.6857142857142858e-06, + "logits/chosen": -5.070017337799072, + "logits/rejected": -5.327781677246094, + "logps/chosen": -360.60809326171875, + "logps/rejected": -292.8214416503906, + "loss": 0.45406789779663087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.748975396156311, + "rewards/margins": 0.6336467266082764, + "rewards/rejected": 0.11532865464687347, + "step": 175 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.4946179389953613, + "learning_rate": 1.638095238095238e-06, + "logits/chosen": -5.378829002380371, + "logits/rejected": -5.2273030281066895, + "logps/chosen": -369.40679931640625, + "logps/rejected": -310.7534484863281, + "loss": 0.40073528289794924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9242110252380371, + "rewards/margins": 0.8946256637573242, + "rewards/rejected": 0.029585417360067368, + "step": 180 + }, + { + "epoch": 1.5849462365591398, + "grad_norm": 1.9285597801208496, + "learning_rate": 1.5904761904761906e-06, + "logits/chosen": -5.534226417541504, + "logits/rejected": -5.36181640625, + "logps/chosen": -285.0791931152344, + "logps/rejected": -258.01654052734375, + "loss": 0.44419097900390625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7649332284927368, + "rewards/margins": 0.6902278661727905, + "rewards/rejected": 0.07470535486936569, + "step": 185 + }, + { + "epoch": 1.6279569892473118, + "grad_norm": 1.4278947114944458, + "learning_rate": 1.5428571428571428e-06, + "logits/chosen": -5.1719889640808105, + "logits/rejected": -5.186745643615723, + "logps/chosen": -366.37286376953125, + "logps/rejected": -310.18048095703125, + "loss": 0.37736806869506834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671168327331543, + "rewards/margins": 1.110413908958435, + "rewards/rejected": 0.3567030429840088, + "step": 190 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.7930585145950317, + "learning_rate": 1.4952380952380954e-06, + "logits/chosen": -5.458104133605957, + "logits/rejected": -5.514155864715576, + "logps/chosen": -288.04083251953125, + "logps/rejected": -311.49090576171875, + "loss": 0.43456592559814455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.7255552411079407, + "rewards/margins": 0.843337893486023, + "rewards/rejected": -0.11778266727924347, + "step": 195 + }, + { + "epoch": 1.7139784946236558, + "grad_norm": 1.3665006160736084, + "learning_rate": 1.4476190476190478e-06, + "logits/chosen": -5.221610069274902, + "logits/rejected": -5.051304340362549, + "logps/chosen": -379.436279296875, + "logps/rejected": -294.8427734375, + "loss": 0.3780463218688965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0199247598648071, + "rewards/margins": 1.0175530910491943, + "rewards/rejected": 0.0023716867435723543, + "step": 200 + }, + { + "epoch": 1.7569892473118278, + "grad_norm": 1.2653045654296875, + "learning_rate": 1.4000000000000001e-06, + "logits/chosen": -5.128909111022949, + "logits/rejected": -4.9968366622924805, + "logps/chosen": -374.5747375488281, + "logps/rejected": -310.3750915527344, + "loss": 0.42014646530151367, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.18593430519104, + "rewards/margins": 1.1055948734283447, + "rewards/rejected": 0.08033928275108337, + "step": 205 + }, + { + "epoch": 1.8, + "grad_norm": 1.0835349559783936, + "learning_rate": 1.3523809523809525e-06, + "logits/chosen": -5.398374557495117, + "logits/rejected": -5.446703910827637, + "logps/chosen": -332.9021911621094, + "logps/rejected": -307.1310119628906, + "loss": 0.4112071990966797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 1.0410544872283936, + "rewards/margins": 1.0919562578201294, + "rewards/rejected": -0.05090172961354256, + "step": 210 + }, + { + "epoch": 1.843010752688172, + "grad_norm": 1.8526785373687744, + "learning_rate": 1.3047619047619047e-06, + "logits/chosen": -5.290182113647461, + "logits/rejected": -5.226934909820557, + "logps/chosen": -376.863525390625, + "logps/rejected": -320.2511291503906, + "loss": 0.4023551940917969, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.0466766357421875, + "rewards/margins": 0.915771484375, + "rewards/rejected": 0.1309051215648651, + "step": 215 + }, + { + "epoch": 1.886021505376344, + "grad_norm": 1.6254379749298096, + "learning_rate": 1.2571428571428571e-06, + "logits/chosen": -5.433383464813232, + "logits/rejected": -5.424604892730713, + "logps/chosen": -276.48382568359375, + "logps/rejected": -254.852294921875, + "loss": 0.42627677917480467, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.1158783435821533, + "rewards/margins": 0.8330133557319641, + "rewards/rejected": 0.2828650176525116, + "step": 220 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 3.121758460998535, + "learning_rate": 1.2095238095238095e-06, + "logits/chosen": -5.485299110412598, + "logits/rejected": -5.462108612060547, + "logps/chosen": -276.477294921875, + "logps/rejected": -285.85516357421875, + "loss": 0.4259671688079834, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.8818387985229492, + "rewards/margins": 0.7649229764938354, + "rewards/rejected": 0.11691585928201675, + "step": 225 + }, + { + "epoch": 1.9720430107526883, + "grad_norm": 2.3801393508911133, + "learning_rate": 1.161904761904762e-06, + "logits/chosen": -5.211794376373291, + "logits/rejected": -4.747314929962158, + "logps/chosen": -416.1683654785156, + "logps/rejected": -365.8246765136719, + "loss": 0.3910404443740845, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.5805695056915283, + "rewards/margins": 1.2381842136383057, + "rewards/rejected": 0.34238511323928833, + "step": 230 + }, + { + "epoch": 2.0086021505376346, + "grad_norm": 1.0709431171417236, + "learning_rate": 1.1142857142857143e-06, + "logits/chosen": -5.255922794342041, + "logits/rejected": -5.191451549530029, + "logps/chosen": -329.1349182128906, + "logps/rejected": -281.3226623535156, + "loss": 0.4125385761260986, + "rewards/accuracies": 0.970588207244873, + "rewards/chosen": 1.2619037628173828, + "rewards/margins": 1.028172254562378, + "rewards/rejected": 0.2337314933538437, + "step": 235 + }, + { + "epoch": 2.0516129032258066, + "grad_norm": 1.131330966949463, + "learning_rate": 1.0666666666666667e-06, + "logits/chosen": -5.735346794128418, + "logits/rejected": -5.384338855743408, + "logps/chosen": -354.9601135253906, + "logps/rejected": -285.20147705078125, + "loss": 0.3143571138381958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6434673070907593, + "rewards/margins": 1.7338101863861084, + "rewards/rejected": -0.09034281969070435, + "step": 240 + }, + { + "epoch": 2.0946236559139786, + "grad_norm": 1.3222469091415405, + "learning_rate": 1.019047619047619e-06, + "logits/chosen": -5.4371256828308105, + "logits/rejected": -5.41799259185791, + "logps/chosen": -318.0994567871094, + "logps/rejected": -296.23126220703125, + "loss": 0.3331931114196777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9413619041442871, + "rewards/margins": 1.1318610906600952, + "rewards/rejected": -0.1904991865158081, + "step": 245 + }, + { + "epoch": 2.1376344086021506, + "grad_norm": 1.1309553384780884, + "learning_rate": 9.714285714285715e-07, + "logits/chosen": -5.32918643951416, + "logits/rejected": -5.228451251983643, + "logps/chosen": -386.4266662597656, + "logps/rejected": -299.82586669921875, + "loss": 0.30677978992462157, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.5374553203582764, + "rewards/margins": 1.2936350107192993, + "rewards/rejected": 0.24382023513317108, + "step": 250 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-250/training_args.bin b/gemma-9b-dpo/checkpoint-250/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-250/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-300/README.md b/gemma-9b-dpo/checkpoint-300/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-300/adapter_config.json b/gemma-9b-dpo/checkpoint-300/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-300/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-300/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dfbcf55be369d9320a59cd31b8ec658e14ca2866 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e21b6453defe06ba4a33dae96d76276b1bd9208353290d1059906119f46daa19 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-300/chat_template.jinja b/gemma-9b-dpo/checkpoint-300/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-300/optimizer.pt b/gemma-9b-dpo/checkpoint-300/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..8e661a0c826faf106e285de22f7b916668b9c677 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:301da9918c955541b5b5443cf2b0e1dbc61d89753faaf193b2d293ee84635b8e +size 110426453 diff --git a/gemma-9b-dpo/checkpoint-300/rng_state.pth b/gemma-9b-dpo/checkpoint-300/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..065f385e762194a148dec7fed295c58a3e7c17fa --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-300/scheduler.pt b/gemma-9b-dpo/checkpoint-300/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..06c1ac9ad37c9876fa52b3d82242258c37f5f177 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:769360de325ba3a6aefdd25977fe32b70d770b4656d25cf6629829537177aba3 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-300/tokenizer.json b/gemma-9b-dpo/checkpoint-300/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-300/tokenizer_config.json b/gemma-9b-dpo/checkpoint-300/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-300/trainer_state.json b/gemma-9b-dpo/checkpoint-300/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d771d41e0a74eb3d9cf95434f02b2718978ff2ed --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/trainer_state.json @@ -0,0 +1,934 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.567741935483871, + "eval_steps": 500, + "global_step": 300, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + }, + { + "epoch": 1.3268817204301075, + "grad_norm": 1.6490590572357178, + "learning_rate": 1.8761904761904762e-06, + "logits/chosen": -5.267385005950928, + "logits/rejected": -5.265533924102783, + "logps/chosen": -297.99224853515625, + "logps/rejected": -326.91339111328125, + "loss": 0.49877166748046875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.7068697810173035, + "rewards/margins": 0.6336223483085632, + "rewards/rejected": 0.07324743270874023, + "step": 155 + }, + { + "epoch": 1.3698924731182796, + "grad_norm": 1.483849048614502, + "learning_rate": 1.8285714285714288e-06, + "logits/chosen": -5.356790065765381, + "logits/rejected": -5.103802680969238, + "logps/chosen": -294.2845153808594, + "logps/rejected": -274.7852783203125, + "loss": 0.49851350784301757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.6102481484413147, + "rewards/margins": 0.5510420203208923, + "rewards/rejected": 0.05920610576868057, + "step": 160 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.3165900707244873, + "learning_rate": 1.780952380952381e-06, + "logits/chosen": -5.431517124176025, + "logits/rejected": -5.376145362854004, + "logps/chosen": -325.1388244628906, + "logps/rejected": -327.6669616699219, + "loss": 0.4309373378753662, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.6497762799263, + "rewards/margins": 0.9170367121696472, + "rewards/rejected": -0.2672604024410248, + "step": 165 + }, + { + "epoch": 1.4559139784946238, + "grad_norm": 1.5377726554870605, + "learning_rate": 1.7333333333333332e-06, + "logits/chosen": -5.27555513381958, + "logits/rejected": -5.022242069244385, + "logps/chosen": -340.17779541015625, + "logps/rejected": -288.0126647949219, + "loss": 0.4575087547302246, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.5721868276596069, + "rewards/margins": 0.6390407085418701, + "rewards/rejected": -0.06685388088226318, + "step": 170 + }, + { + "epoch": 1.4989247311827958, + "grad_norm": 2.0849504470825195, + "learning_rate": 1.6857142857142858e-06, + "logits/chosen": -5.070017337799072, + "logits/rejected": -5.327781677246094, + "logps/chosen": -360.60809326171875, + "logps/rejected": -292.8214416503906, + "loss": 0.45406789779663087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.748975396156311, + "rewards/margins": 0.6336467266082764, + "rewards/rejected": 0.11532865464687347, + "step": 175 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.4946179389953613, + "learning_rate": 1.638095238095238e-06, + "logits/chosen": -5.378829002380371, + "logits/rejected": -5.2273030281066895, + "logps/chosen": -369.40679931640625, + "logps/rejected": -310.7534484863281, + "loss": 0.40073528289794924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9242110252380371, + "rewards/margins": 0.8946256637573242, + "rewards/rejected": 0.029585417360067368, + "step": 180 + }, + { + "epoch": 1.5849462365591398, + "grad_norm": 1.9285597801208496, + "learning_rate": 1.5904761904761906e-06, + "logits/chosen": -5.534226417541504, + "logits/rejected": -5.36181640625, + "logps/chosen": -285.0791931152344, + "logps/rejected": -258.01654052734375, + "loss": 0.44419097900390625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7649332284927368, + "rewards/margins": 0.6902278661727905, + "rewards/rejected": 0.07470535486936569, + "step": 185 + }, + { + "epoch": 1.6279569892473118, + "grad_norm": 1.4278947114944458, + "learning_rate": 1.5428571428571428e-06, + "logits/chosen": -5.1719889640808105, + "logits/rejected": -5.186745643615723, + "logps/chosen": -366.37286376953125, + "logps/rejected": -310.18048095703125, + "loss": 0.37736806869506834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671168327331543, + "rewards/margins": 1.110413908958435, + "rewards/rejected": 0.3567030429840088, + "step": 190 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.7930585145950317, + "learning_rate": 1.4952380952380954e-06, + "logits/chosen": -5.458104133605957, + "logits/rejected": -5.514155864715576, + "logps/chosen": -288.04083251953125, + "logps/rejected": -311.49090576171875, + "loss": 0.43456592559814455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.7255552411079407, + "rewards/margins": 0.843337893486023, + "rewards/rejected": -0.11778266727924347, + "step": 195 + }, + { + "epoch": 1.7139784946236558, + "grad_norm": 1.3665006160736084, + "learning_rate": 1.4476190476190478e-06, + "logits/chosen": -5.221610069274902, + "logits/rejected": -5.051304340362549, + "logps/chosen": -379.436279296875, + "logps/rejected": -294.8427734375, + "loss": 0.3780463218688965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0199247598648071, + "rewards/margins": 1.0175530910491943, + "rewards/rejected": 0.0023716867435723543, + "step": 200 + }, + { + "epoch": 1.7569892473118278, + "grad_norm": 1.2653045654296875, + "learning_rate": 1.4000000000000001e-06, + "logits/chosen": -5.128909111022949, + "logits/rejected": -4.9968366622924805, + "logps/chosen": -374.5747375488281, + "logps/rejected": -310.3750915527344, + "loss": 0.42014646530151367, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.18593430519104, + "rewards/margins": 1.1055948734283447, + "rewards/rejected": 0.08033928275108337, + "step": 205 + }, + { + "epoch": 1.8, + "grad_norm": 1.0835349559783936, + "learning_rate": 1.3523809523809525e-06, + "logits/chosen": -5.398374557495117, + "logits/rejected": -5.446703910827637, + "logps/chosen": -332.9021911621094, + "logps/rejected": -307.1310119628906, + "loss": 0.4112071990966797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 1.0410544872283936, + "rewards/margins": 1.0919562578201294, + "rewards/rejected": -0.05090172961354256, + "step": 210 + }, + { + "epoch": 1.843010752688172, + "grad_norm": 1.8526785373687744, + "learning_rate": 1.3047619047619047e-06, + "logits/chosen": -5.290182113647461, + "logits/rejected": -5.226934909820557, + "logps/chosen": -376.863525390625, + "logps/rejected": -320.2511291503906, + "loss": 0.4023551940917969, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.0466766357421875, + "rewards/margins": 0.915771484375, + "rewards/rejected": 0.1309051215648651, + "step": 215 + }, + { + "epoch": 1.886021505376344, + "grad_norm": 1.6254379749298096, + "learning_rate": 1.2571428571428571e-06, + "logits/chosen": -5.433383464813232, + "logits/rejected": -5.424604892730713, + "logps/chosen": -276.48382568359375, + "logps/rejected": -254.852294921875, + "loss": 0.42627677917480467, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.1158783435821533, + "rewards/margins": 0.8330133557319641, + "rewards/rejected": 0.2828650176525116, + "step": 220 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 3.121758460998535, + "learning_rate": 1.2095238095238095e-06, + "logits/chosen": -5.485299110412598, + "logits/rejected": -5.462108612060547, + "logps/chosen": -276.477294921875, + "logps/rejected": -285.85516357421875, + "loss": 0.4259671688079834, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.8818387985229492, + "rewards/margins": 0.7649229764938354, + "rewards/rejected": 0.11691585928201675, + "step": 225 + }, + { + "epoch": 1.9720430107526883, + "grad_norm": 2.3801393508911133, + "learning_rate": 1.161904761904762e-06, + "logits/chosen": -5.211794376373291, + "logits/rejected": -4.747314929962158, + "logps/chosen": -416.1683654785156, + "logps/rejected": -365.8246765136719, + "loss": 0.3910404443740845, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.5805695056915283, + "rewards/margins": 1.2381842136383057, + "rewards/rejected": 0.34238511323928833, + "step": 230 + }, + { + "epoch": 2.0086021505376346, + "grad_norm": 1.0709431171417236, + "learning_rate": 1.1142857142857143e-06, + "logits/chosen": -5.255922794342041, + "logits/rejected": -5.191451549530029, + "logps/chosen": -329.1349182128906, + "logps/rejected": -281.3226623535156, + "loss": 0.4125385761260986, + "rewards/accuracies": 0.970588207244873, + "rewards/chosen": 1.2619037628173828, + "rewards/margins": 1.028172254562378, + "rewards/rejected": 0.2337314933538437, + "step": 235 + }, + { + "epoch": 2.0516129032258066, + "grad_norm": 1.131330966949463, + "learning_rate": 1.0666666666666667e-06, + "logits/chosen": -5.735346794128418, + "logits/rejected": -5.384338855743408, + "logps/chosen": -354.9601135253906, + "logps/rejected": -285.20147705078125, + "loss": 0.3143571138381958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6434673070907593, + "rewards/margins": 1.7338101863861084, + "rewards/rejected": -0.09034281969070435, + "step": 240 + }, + { + "epoch": 2.0946236559139786, + "grad_norm": 1.3222469091415405, + "learning_rate": 1.019047619047619e-06, + "logits/chosen": -5.4371256828308105, + "logits/rejected": -5.41799259185791, + "logps/chosen": -318.0994567871094, + "logps/rejected": -296.23126220703125, + "loss": 0.3331931114196777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9413619041442871, + "rewards/margins": 1.1318610906600952, + "rewards/rejected": -0.1904991865158081, + "step": 245 + }, + { + "epoch": 2.1376344086021506, + "grad_norm": 1.1309553384780884, + "learning_rate": 9.714285714285715e-07, + "logits/chosen": -5.32918643951416, + "logits/rejected": -5.228451251983643, + "logps/chosen": -386.4266662597656, + "logps/rejected": -299.82586669921875, + "loss": 0.30677978992462157, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.5374553203582764, + "rewards/margins": 1.2936350107192993, + "rewards/rejected": 0.24382023513317108, + "step": 250 + }, + { + "epoch": 2.1806451612903226, + "grad_norm": 1.2125121355056763, + "learning_rate": 9.238095238095239e-07, + "logits/chosen": -5.294185638427734, + "logits/rejected": -5.28645658493042, + "logps/chosen": -297.06683349609375, + "logps/rejected": -259.61456298828125, + "loss": 0.35644917488098143, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0588313341140747, + "rewards/margins": 0.9543617367744446, + "rewards/rejected": 0.10446955263614655, + "step": 255 + }, + { + "epoch": 2.2236559139784946, + "grad_norm": 1.6852556467056274, + "learning_rate": 8.761904761904763e-07, + "logits/chosen": -5.6651225090026855, + "logits/rejected": -5.341966152191162, + "logps/chosen": -343.6695556640625, + "logps/rejected": -288.97869873046875, + "loss": 0.3159534454345703, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1598581075668335, + "rewards/margins": 1.1884613037109375, + "rewards/rejected": -0.02860334888100624, + "step": 260 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 1.3867968320846558, + "learning_rate": 8.285714285714287e-07, + "logits/chosen": -5.487370491027832, + "logits/rejected": -5.273520469665527, + "logps/chosen": -370.5751037597656, + "logps/rejected": -309.1852722167969, + "loss": 0.2891366720199585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5521111488342285, + "rewards/margins": 1.3738138675689697, + "rewards/rejected": 0.17829741537570953, + "step": 265 + }, + { + "epoch": 2.3096774193548386, + "grad_norm": 1.3379493951797485, + "learning_rate": 7.80952380952381e-07, + "logits/chosen": -5.195990085601807, + "logits/rejected": -5.239910125732422, + "logps/chosen": -313.62939453125, + "logps/rejected": -252.7853546142578, + "loss": 0.32800557613372805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3296881914138794, + "rewards/margins": 1.1511423587799072, + "rewards/rejected": 0.1785457581281662, + "step": 270 + }, + { + "epoch": 2.3526881720430106, + "grad_norm": 1.2220584154129028, + "learning_rate": 7.333333333333333e-07, + "logits/chosen": -5.423740386962891, + "logits/rejected": -5.007106781005859, + "logps/chosen": -338.8307189941406, + "logps/rejected": -317.20355224609375, + "loss": 0.2776246786117554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2019110918045044, + "rewards/margins": 1.4179589748382568, + "rewards/rejected": -0.21604792773723602, + "step": 275 + }, + { + "epoch": 2.3956989247311826, + "grad_norm": 1.2976250648498535, + "learning_rate": 6.857142857142857e-07, + "logits/chosen": -5.235350608825684, + "logits/rejected": -5.151061534881592, + "logps/chosen": -311.9940185546875, + "logps/rejected": -273.13653564453125, + "loss": 0.29607300758361815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1353685855865479, + "rewards/margins": 1.4848861694335938, + "rewards/rejected": -0.34951773285865784, + "step": 280 + }, + { + "epoch": 2.4387096774193546, + "grad_norm": 1.5546081066131592, + "learning_rate": 6.380952380952381e-07, + "logits/chosen": -5.2240986824035645, + "logits/rejected": -5.1968793869018555, + "logps/chosen": -343.4505920410156, + "logps/rejected": -266.83563232421875, + "loss": 0.3283820152282715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.146566390991211, + "rewards/margins": 1.1784019470214844, + "rewards/rejected": -0.03183561936020851, + "step": 285 + }, + { + "epoch": 2.481720430107527, + "grad_norm": 1.9226080179214478, + "learning_rate": 5.904761904761905e-07, + "logits/chosen": -5.224617958068848, + "logits/rejected": -5.07640266418457, + "logps/chosen": -368.66729736328125, + "logps/rejected": -335.21917724609375, + "loss": 0.31503658294677733, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1312525272369385, + "rewards/margins": 1.1962707042694092, + "rewards/rejected": -0.0650181695818901, + "step": 290 + }, + { + "epoch": 2.524731182795699, + "grad_norm": 1.0341100692749023, + "learning_rate": 5.428571428571429e-07, + "logits/chosen": -5.5554656982421875, + "logits/rejected": -5.393430709838867, + "logps/chosen": -295.96405029296875, + "logps/rejected": -290.5457458496094, + "loss": 0.2795746326446533, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1615794897079468, + "rewards/margins": 1.2998874187469482, + "rewards/rejected": -0.1383078545331955, + "step": 295 + }, + { + "epoch": 2.567741935483871, + "grad_norm": 1.2715569734573364, + "learning_rate": 4.952380952380952e-07, + "logits/chosen": -5.555523872375488, + "logits/rejected": -5.402945518493652, + "logps/chosen": -335.7245788574219, + "logps/rejected": -330.8264465332031, + "loss": 0.2791964292526245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0191905498504639, + "rewards/margins": 1.5995515584945679, + "rewards/rejected": -0.580361008644104, + "step": 300 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-300/training_args.bin b/gemma-9b-dpo/checkpoint-300/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-300/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-350/README.md b/gemma-9b-dpo/checkpoint-350/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-350/adapter_config.json b/gemma-9b-dpo/checkpoint-350/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-350/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-350/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9d3299168efdcdd73613a5b6ba7f9c396e76604b --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6786b26f32fad794069eb6761d9c1c979225858f4b9b4bca34e644d97e39667 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-350/chat_template.jinja b/gemma-9b-dpo/checkpoint-350/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-350/optimizer.pt b/gemma-9b-dpo/checkpoint-350/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e86c8349a2194810e5dde7d9e396822ed03bc15 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a92ff8a4d99ebf521e204e2b80f03df9d3538562b1597e2bc5ca214bb786e0 +size 110426453 diff --git a/gemma-9b-dpo/checkpoint-350/rng_state.pth b/gemma-9b-dpo/checkpoint-350/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..065f385e762194a148dec7fed295c58a3e7c17fa --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-350/scheduler.pt b/gemma-9b-dpo/checkpoint-350/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..8bb082ba04bae8465da3de7c80b46611ea6fe4b5 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d0f0284191a433d821c012504d016a3b8f78c581693d0930ed33730141933c2 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-350/tokenizer.json b/gemma-9b-dpo/checkpoint-350/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-350/tokenizer_config.json b/gemma-9b-dpo/checkpoint-350/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-350/trainer_state.json b/gemma-9b-dpo/checkpoint-350/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..02f225b001b5bd0f0d46ad211ac111ee7447d68c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/trainer_state.json @@ -0,0 +1,1084 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9978494623655916, + "eval_steps": 500, + "global_step": 350, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + }, + { + "epoch": 1.3268817204301075, + "grad_norm": 1.6490590572357178, + "learning_rate": 1.8761904761904762e-06, + "logits/chosen": -5.267385005950928, + "logits/rejected": -5.265533924102783, + "logps/chosen": -297.99224853515625, + "logps/rejected": -326.91339111328125, + "loss": 0.49877166748046875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.7068697810173035, + "rewards/margins": 0.6336223483085632, + "rewards/rejected": 0.07324743270874023, + "step": 155 + }, + { + "epoch": 1.3698924731182796, + "grad_norm": 1.483849048614502, + "learning_rate": 1.8285714285714288e-06, + "logits/chosen": -5.356790065765381, + "logits/rejected": -5.103802680969238, + "logps/chosen": -294.2845153808594, + "logps/rejected": -274.7852783203125, + "loss": 0.49851350784301757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.6102481484413147, + "rewards/margins": 0.5510420203208923, + "rewards/rejected": 0.05920610576868057, + "step": 160 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.3165900707244873, + "learning_rate": 1.780952380952381e-06, + "logits/chosen": -5.431517124176025, + "logits/rejected": -5.376145362854004, + "logps/chosen": -325.1388244628906, + "logps/rejected": -327.6669616699219, + "loss": 0.4309373378753662, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.6497762799263, + "rewards/margins": 0.9170367121696472, + "rewards/rejected": -0.2672604024410248, + "step": 165 + }, + { + "epoch": 1.4559139784946238, + "grad_norm": 1.5377726554870605, + "learning_rate": 1.7333333333333332e-06, + "logits/chosen": -5.27555513381958, + "logits/rejected": -5.022242069244385, + "logps/chosen": -340.17779541015625, + "logps/rejected": -288.0126647949219, + "loss": 0.4575087547302246, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.5721868276596069, + "rewards/margins": 0.6390407085418701, + "rewards/rejected": -0.06685388088226318, + "step": 170 + }, + { + "epoch": 1.4989247311827958, + "grad_norm": 2.0849504470825195, + "learning_rate": 1.6857142857142858e-06, + "logits/chosen": -5.070017337799072, + "logits/rejected": -5.327781677246094, + "logps/chosen": -360.60809326171875, + "logps/rejected": -292.8214416503906, + "loss": 0.45406789779663087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.748975396156311, + "rewards/margins": 0.6336467266082764, + "rewards/rejected": 0.11532865464687347, + "step": 175 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.4946179389953613, + "learning_rate": 1.638095238095238e-06, + "logits/chosen": -5.378829002380371, + "logits/rejected": -5.2273030281066895, + "logps/chosen": -369.40679931640625, + "logps/rejected": -310.7534484863281, + "loss": 0.40073528289794924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9242110252380371, + "rewards/margins": 0.8946256637573242, + "rewards/rejected": 0.029585417360067368, + "step": 180 + }, + { + "epoch": 1.5849462365591398, + "grad_norm": 1.9285597801208496, + "learning_rate": 1.5904761904761906e-06, + "logits/chosen": -5.534226417541504, + "logits/rejected": -5.36181640625, + "logps/chosen": -285.0791931152344, + "logps/rejected": -258.01654052734375, + "loss": 0.44419097900390625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7649332284927368, + "rewards/margins": 0.6902278661727905, + "rewards/rejected": 0.07470535486936569, + "step": 185 + }, + { + "epoch": 1.6279569892473118, + "grad_norm": 1.4278947114944458, + "learning_rate": 1.5428571428571428e-06, + "logits/chosen": -5.1719889640808105, + "logits/rejected": -5.186745643615723, + "logps/chosen": -366.37286376953125, + "logps/rejected": -310.18048095703125, + "loss": 0.37736806869506834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671168327331543, + "rewards/margins": 1.110413908958435, + "rewards/rejected": 0.3567030429840088, + "step": 190 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.7930585145950317, + "learning_rate": 1.4952380952380954e-06, + "logits/chosen": -5.458104133605957, + "logits/rejected": -5.514155864715576, + "logps/chosen": -288.04083251953125, + "logps/rejected": -311.49090576171875, + "loss": 0.43456592559814455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.7255552411079407, + "rewards/margins": 0.843337893486023, + "rewards/rejected": -0.11778266727924347, + "step": 195 + }, + { + "epoch": 1.7139784946236558, + "grad_norm": 1.3665006160736084, + "learning_rate": 1.4476190476190478e-06, + "logits/chosen": -5.221610069274902, + "logits/rejected": -5.051304340362549, + "logps/chosen": -379.436279296875, + "logps/rejected": -294.8427734375, + "loss": 0.3780463218688965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0199247598648071, + "rewards/margins": 1.0175530910491943, + "rewards/rejected": 0.0023716867435723543, + "step": 200 + }, + { + "epoch": 1.7569892473118278, + "grad_norm": 1.2653045654296875, + "learning_rate": 1.4000000000000001e-06, + "logits/chosen": -5.128909111022949, + "logits/rejected": -4.9968366622924805, + "logps/chosen": -374.5747375488281, + "logps/rejected": -310.3750915527344, + "loss": 0.42014646530151367, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.18593430519104, + "rewards/margins": 1.1055948734283447, + "rewards/rejected": 0.08033928275108337, + "step": 205 + }, + { + "epoch": 1.8, + "grad_norm": 1.0835349559783936, + "learning_rate": 1.3523809523809525e-06, + "logits/chosen": -5.398374557495117, + "logits/rejected": -5.446703910827637, + "logps/chosen": -332.9021911621094, + "logps/rejected": -307.1310119628906, + "loss": 0.4112071990966797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 1.0410544872283936, + "rewards/margins": 1.0919562578201294, + "rewards/rejected": -0.05090172961354256, + "step": 210 + }, + { + "epoch": 1.843010752688172, + "grad_norm": 1.8526785373687744, + "learning_rate": 1.3047619047619047e-06, + "logits/chosen": -5.290182113647461, + "logits/rejected": -5.226934909820557, + "logps/chosen": -376.863525390625, + "logps/rejected": -320.2511291503906, + "loss": 0.4023551940917969, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.0466766357421875, + "rewards/margins": 0.915771484375, + "rewards/rejected": 0.1309051215648651, + "step": 215 + }, + { + "epoch": 1.886021505376344, + "grad_norm": 1.6254379749298096, + "learning_rate": 1.2571428571428571e-06, + "logits/chosen": -5.433383464813232, + "logits/rejected": -5.424604892730713, + "logps/chosen": -276.48382568359375, + "logps/rejected": -254.852294921875, + "loss": 0.42627677917480467, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.1158783435821533, + "rewards/margins": 0.8330133557319641, + "rewards/rejected": 0.2828650176525116, + "step": 220 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 3.121758460998535, + "learning_rate": 1.2095238095238095e-06, + "logits/chosen": -5.485299110412598, + "logits/rejected": -5.462108612060547, + "logps/chosen": -276.477294921875, + "logps/rejected": -285.85516357421875, + "loss": 0.4259671688079834, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.8818387985229492, + "rewards/margins": 0.7649229764938354, + "rewards/rejected": 0.11691585928201675, + "step": 225 + }, + { + "epoch": 1.9720430107526883, + "grad_norm": 2.3801393508911133, + "learning_rate": 1.161904761904762e-06, + "logits/chosen": -5.211794376373291, + "logits/rejected": -4.747314929962158, + "logps/chosen": -416.1683654785156, + "logps/rejected": -365.8246765136719, + "loss": 0.3910404443740845, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.5805695056915283, + "rewards/margins": 1.2381842136383057, + "rewards/rejected": 0.34238511323928833, + "step": 230 + }, + { + "epoch": 2.0086021505376346, + "grad_norm": 1.0709431171417236, + "learning_rate": 1.1142857142857143e-06, + "logits/chosen": -5.255922794342041, + "logits/rejected": -5.191451549530029, + "logps/chosen": -329.1349182128906, + "logps/rejected": -281.3226623535156, + "loss": 0.4125385761260986, + "rewards/accuracies": 0.970588207244873, + "rewards/chosen": 1.2619037628173828, + "rewards/margins": 1.028172254562378, + "rewards/rejected": 0.2337314933538437, + "step": 235 + }, + { + "epoch": 2.0516129032258066, + "grad_norm": 1.131330966949463, + "learning_rate": 1.0666666666666667e-06, + "logits/chosen": -5.735346794128418, + "logits/rejected": -5.384338855743408, + "logps/chosen": -354.9601135253906, + "logps/rejected": -285.20147705078125, + "loss": 0.3143571138381958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6434673070907593, + "rewards/margins": 1.7338101863861084, + "rewards/rejected": -0.09034281969070435, + "step": 240 + }, + { + "epoch": 2.0946236559139786, + "grad_norm": 1.3222469091415405, + "learning_rate": 1.019047619047619e-06, + "logits/chosen": -5.4371256828308105, + "logits/rejected": -5.41799259185791, + "logps/chosen": -318.0994567871094, + "logps/rejected": -296.23126220703125, + "loss": 0.3331931114196777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9413619041442871, + "rewards/margins": 1.1318610906600952, + "rewards/rejected": -0.1904991865158081, + "step": 245 + }, + { + "epoch": 2.1376344086021506, + "grad_norm": 1.1309553384780884, + "learning_rate": 9.714285714285715e-07, + "logits/chosen": -5.32918643951416, + "logits/rejected": -5.228451251983643, + "logps/chosen": -386.4266662597656, + "logps/rejected": -299.82586669921875, + "loss": 0.30677978992462157, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.5374553203582764, + "rewards/margins": 1.2936350107192993, + "rewards/rejected": 0.24382023513317108, + "step": 250 + }, + { + "epoch": 2.1806451612903226, + "grad_norm": 1.2125121355056763, + "learning_rate": 9.238095238095239e-07, + "logits/chosen": -5.294185638427734, + "logits/rejected": -5.28645658493042, + "logps/chosen": -297.06683349609375, + "logps/rejected": -259.61456298828125, + "loss": 0.35644917488098143, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0588313341140747, + "rewards/margins": 0.9543617367744446, + "rewards/rejected": 0.10446955263614655, + "step": 255 + }, + { + "epoch": 2.2236559139784946, + "grad_norm": 1.6852556467056274, + "learning_rate": 8.761904761904763e-07, + "logits/chosen": -5.6651225090026855, + "logits/rejected": -5.341966152191162, + "logps/chosen": -343.6695556640625, + "logps/rejected": -288.97869873046875, + "loss": 0.3159534454345703, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1598581075668335, + "rewards/margins": 1.1884613037109375, + "rewards/rejected": -0.02860334888100624, + "step": 260 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 1.3867968320846558, + "learning_rate": 8.285714285714287e-07, + "logits/chosen": -5.487370491027832, + "logits/rejected": -5.273520469665527, + "logps/chosen": -370.5751037597656, + "logps/rejected": -309.1852722167969, + "loss": 0.2891366720199585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5521111488342285, + "rewards/margins": 1.3738138675689697, + "rewards/rejected": 0.17829741537570953, + "step": 265 + }, + { + "epoch": 2.3096774193548386, + "grad_norm": 1.3379493951797485, + "learning_rate": 7.80952380952381e-07, + "logits/chosen": -5.195990085601807, + "logits/rejected": -5.239910125732422, + "logps/chosen": -313.62939453125, + "logps/rejected": -252.7853546142578, + "loss": 0.32800557613372805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3296881914138794, + "rewards/margins": 1.1511423587799072, + "rewards/rejected": 0.1785457581281662, + "step": 270 + }, + { + "epoch": 2.3526881720430106, + "grad_norm": 1.2220584154129028, + "learning_rate": 7.333333333333333e-07, + "logits/chosen": -5.423740386962891, + "logits/rejected": -5.007106781005859, + "logps/chosen": -338.8307189941406, + "logps/rejected": -317.20355224609375, + "loss": 0.2776246786117554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2019110918045044, + "rewards/margins": 1.4179589748382568, + "rewards/rejected": -0.21604792773723602, + "step": 275 + }, + { + "epoch": 2.3956989247311826, + "grad_norm": 1.2976250648498535, + "learning_rate": 6.857142857142857e-07, + "logits/chosen": -5.235350608825684, + "logits/rejected": -5.151061534881592, + "logps/chosen": -311.9940185546875, + "logps/rejected": -273.13653564453125, + "loss": 0.29607300758361815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1353685855865479, + "rewards/margins": 1.4848861694335938, + "rewards/rejected": -0.34951773285865784, + "step": 280 + }, + { + "epoch": 2.4387096774193546, + "grad_norm": 1.5546081066131592, + "learning_rate": 6.380952380952381e-07, + "logits/chosen": -5.2240986824035645, + "logits/rejected": -5.1968793869018555, + "logps/chosen": -343.4505920410156, + "logps/rejected": -266.83563232421875, + "loss": 0.3283820152282715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.146566390991211, + "rewards/margins": 1.1784019470214844, + "rewards/rejected": -0.03183561936020851, + "step": 285 + }, + { + "epoch": 2.481720430107527, + "grad_norm": 1.9226080179214478, + "learning_rate": 5.904761904761905e-07, + "logits/chosen": -5.224617958068848, + "logits/rejected": -5.07640266418457, + "logps/chosen": -368.66729736328125, + "logps/rejected": -335.21917724609375, + "loss": 0.31503658294677733, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1312525272369385, + "rewards/margins": 1.1962707042694092, + "rewards/rejected": -0.0650181695818901, + "step": 290 + }, + { + "epoch": 2.524731182795699, + "grad_norm": 1.0341100692749023, + "learning_rate": 5.428571428571429e-07, + "logits/chosen": -5.5554656982421875, + "logits/rejected": -5.393430709838867, + "logps/chosen": -295.96405029296875, + "logps/rejected": -290.5457458496094, + "loss": 0.2795746326446533, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1615794897079468, + "rewards/margins": 1.2998874187469482, + "rewards/rejected": -0.1383078545331955, + "step": 295 + }, + { + "epoch": 2.567741935483871, + "grad_norm": 1.2715569734573364, + "learning_rate": 4.952380952380952e-07, + "logits/chosen": -5.555523872375488, + "logits/rejected": -5.402945518493652, + "logps/chosen": -335.7245788574219, + "logps/rejected": -330.8264465332031, + "loss": 0.2791964292526245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0191905498504639, + "rewards/margins": 1.5995515584945679, + "rewards/rejected": -0.580361008644104, + "step": 300 + }, + { + "epoch": 2.610752688172043, + "grad_norm": 1.1054896116256714, + "learning_rate": 4.4761904761904764e-07, + "logits/chosen": -5.346233367919922, + "logits/rejected": -5.2186713218688965, + "logps/chosen": -344.9323425292969, + "logps/rejected": -347.8134765625, + "loss": 0.2980963945388794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1464207172393799, + "rewards/margins": 1.3599145412445068, + "rewards/rejected": -0.21349389851093292, + "step": 305 + }, + { + "epoch": 2.653763440860215, + "grad_norm": 0.9144394397735596, + "learning_rate": 4e-07, + "logits/chosen": -5.2957868576049805, + "logits/rejected": -5.262829780578613, + "logps/chosen": -359.19464111328125, + "logps/rejected": -328.304443359375, + "loss": 0.25096189975738525, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.198318362236023, + "rewards/margins": 1.7069896459579468, + "rewards/rejected": -0.5086712837219238, + "step": 310 + }, + { + "epoch": 2.696774193548387, + "grad_norm": 0.8603355884552002, + "learning_rate": 3.523809523809524e-07, + "logits/chosen": -5.491151809692383, + "logits/rejected": -5.42437219619751, + "logps/chosen": -367.32843017578125, + "logps/rejected": -313.4622497558594, + "loss": 0.2945852279663086, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.4304109811782837, + "rewards/margins": 1.554801106452942, + "rewards/rejected": -0.12439011037349701, + "step": 315 + }, + { + "epoch": 2.739784946236559, + "grad_norm": 1.1267086267471313, + "learning_rate": 3.0476190476190477e-07, + "logits/chosen": -5.442085266113281, + "logits/rejected": -5.499457359313965, + "logps/chosen": -298.1466979980469, + "logps/rejected": -318.5505676269531, + "loss": 0.36453125476837156, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9415658712387085, + "rewards/margins": 1.0582208633422852, + "rewards/rejected": -0.11665502935647964, + "step": 320 + }, + { + "epoch": 2.782795698924731, + "grad_norm": 1.8089045286178589, + "learning_rate": 2.5714285714285716e-07, + "logits/chosen": -5.749306678771973, + "logits/rejected": -5.6225762367248535, + "logps/chosen": -254.8236541748047, + "logps/rejected": -271.09698486328125, + "loss": 0.2935594081878662, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.8951157331466675, + "rewards/margins": 1.194537878036499, + "rewards/rejected": -0.2994222044944763, + "step": 325 + }, + { + "epoch": 2.825806451612903, + "grad_norm": 1.1241213083267212, + "learning_rate": 2.0952380952380953e-07, + "logits/chosen": -5.630118370056152, + "logits/rejected": -5.584993362426758, + "logps/chosen": -267.04742431640625, + "logps/rejected": -239.6341094970703, + "loss": 0.30673086643218994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19244384765625, + "rewards/margins": 1.149562120437622, + "rewards/rejected": 0.04288160055875778, + "step": 330 + }, + { + "epoch": 2.868817204301075, + "grad_norm": 1.8405497074127197, + "learning_rate": 1.6190476190476192e-07, + "logits/chosen": -5.395017623901367, + "logits/rejected": -5.194415092468262, + "logps/chosen": -344.854736328125, + "logps/rejected": -338.8992919921875, + "loss": 0.2845343351364136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1038144826889038, + "rewards/margins": 1.5797145366668701, + "rewards/rejected": -0.4759000241756439, + "step": 335 + }, + { + "epoch": 2.9118279569892476, + "grad_norm": 1.088416576385498, + "learning_rate": 1.142857142857143e-07, + "logits/chosen": -5.30595588684082, + "logits/rejected": -5.5908002853393555, + "logps/chosen": -325.8397521972656, + "logps/rejected": -315.6219177246094, + "loss": 0.2914729595184326, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.364817500114441, + "rewards/margins": 1.4060461521148682, + "rewards/rejected": -0.04122857004404068, + "step": 340 + }, + { + "epoch": 2.9548387096774196, + "grad_norm": 1.6547528505325317, + "learning_rate": 6.666666666666667e-08, + "logits/chosen": -5.339937686920166, + "logits/rejected": -5.211861610412598, + "logps/chosen": -356.51739501953125, + "logps/rejected": -403.10015869140625, + "loss": 0.36704728603363035, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.14650297164917, + "rewards/margins": 1.1948604583740234, + "rewards/rejected": -0.04835757985711098, + "step": 345 + }, + { + "epoch": 2.9978494623655916, + "grad_norm": 1.088441014289856, + "learning_rate": 1.9047619047619048e-08, + "logits/chosen": -5.6740312576293945, + "logits/rejected": -5.567724704742432, + "logps/chosen": -295.913330078125, + "logps/rejected": -332.6319274902344, + "loss": 0.27830004692077637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9771059155464172, + "rewards/margins": 1.485169768333435, + "rewards/rejected": -0.5080639123916626, + "step": 350 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-350/training_args.bin b/gemma-9b-dpo/checkpoint-350/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-350/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-351/README.md b/gemma-9b-dpo/checkpoint-351/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-351/adapter_config.json b/gemma-9b-dpo/checkpoint-351/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-351/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-351/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ff7314384e504e61384037c1a6cdad1b96b2e49 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ea0ff88e00dfb29580d0fa61936b45ac7ee6e5f886f57fd984f2f3854211d5e +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-351/chat_template.jinja b/gemma-9b-dpo/checkpoint-351/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-351/optimizer.pt b/gemma-9b-dpo/checkpoint-351/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b69500169639b258c53b955b359f64fbc0b86687 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23123e7c96e08f3d5910ec5e3b38bd8edc76216085e0cc9fd567378021b8b76a +size 110426453 diff --git a/gemma-9b-dpo/checkpoint-351/rng_state.pth b/gemma-9b-dpo/checkpoint-351/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..065f385e762194a148dec7fed295c58a3e7c17fa --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2534e434cd5abbb8f7668d3eab0549db0ef95d6a797a3efa86b712e8e32266a7 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-351/scheduler.pt b/gemma-9b-dpo/checkpoint-351/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..84d085b4c2f7d07cb01b9ae35bebfa00d679a2ff --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aff2ffb570ea5b4f50869c74f63d0fd40657fb784e59d6d7b029d945ec7612f7 +size 1465 diff --git a/gemma-9b-dpo/checkpoint-351/tokenizer.json b/gemma-9b-dpo/checkpoint-351/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-351/tokenizer_config.json b/gemma-9b-dpo/checkpoint-351/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-351/trainer_state.json b/gemma-9b-dpo/checkpoint-351/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c366002805e1687241610013cf5ddfafb311c9fb --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/trainer_state.json @@ -0,0 +1,1084 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 351, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + }, + { + "epoch": 0.4731182795698925, + "grad_norm": 2.174459457397461, + "learning_rate": 2.8285714285714288e-06, + "logits/chosen": -5.777364253997803, + "logits/rejected": -5.834509372711182, + "logps/chosen": -316.31390380859375, + "logps/rejected": -282.5376892089844, + "loss": 0.6832016944885254, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.01756533980369568, + "rewards/margins": 0.02713041380047798, + "rewards/rejected": -0.04469575732946396, + "step": 55 + }, + { + "epoch": 0.5161290322580645, + "grad_norm": 2.2876009941101074, + "learning_rate": 2.780952380952381e-06, + "logits/chosen": -5.92340087890625, + "logits/rejected": -5.846226692199707, + "logps/chosen": -235.49667358398438, + "logps/rejected": -267.7764892578125, + "loss": 0.6656109809875488, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.021378064528107643, + "rewards/margins": 0.06001085042953491, + "rewards/rejected": -0.038632798939943314, + "step": 60 + }, + { + "epoch": 0.5591397849462365, + "grad_norm": 3.45564603805542, + "learning_rate": 2.733333333333333e-06, + "logits/chosen": -5.650258541107178, + "logits/rejected": -5.6721577644348145, + "logps/chosen": -381.40655517578125, + "logps/rejected": -311.68865966796875, + "loss": 0.7090614795684814, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 0.1167660504579544, + "rewards/margins": -0.0002812861348502338, + "rewards/rejected": 0.11704733222723007, + "step": 65 + }, + { + "epoch": 0.6021505376344086, + "grad_norm": 4.502689838409424, + "learning_rate": 2.685714285714286e-06, + "logits/chosen": -5.727735996246338, + "logits/rejected": -5.644078254699707, + "logps/chosen": -362.89886474609375, + "logps/rejected": -314.7126770019531, + "loss": 0.6596640110015869, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.07285688072443008, + "rewards/margins": 0.07270809262990952, + "rewards/rejected": 0.00014879256195854396, + "step": 70 + }, + { + "epoch": 0.6451612903225806, + "grad_norm": 2.2681005001068115, + "learning_rate": 2.638095238095238e-06, + "logits/chosen": -5.528594017028809, + "logits/rejected": -5.29849910736084, + "logps/chosen": -353.0726013183594, + "logps/rejected": -331.8568115234375, + "loss": 0.7387234687805175, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.1685911864042282, + "rewards/margins": -0.04706493765115738, + "rewards/rejected": 0.2156561315059662, + "step": 75 + }, + { + "epoch": 0.6881720430107527, + "grad_norm": 3.0135886669158936, + "learning_rate": 2.5904761904761907e-06, + "logits/chosen": -5.8677144050598145, + "logits/rejected": -5.5384016036987305, + "logps/chosen": -326.17724609375, + "logps/rejected": -285.8974304199219, + "loss": 0.6047093391418457, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.19864055514335632, + "rewards/margins": 0.19678126275539398, + "rewards/rejected": 0.0018593042623251677, + "step": 80 + }, + { + "epoch": 0.7311827956989247, + "grad_norm": 1.7684988975524902, + "learning_rate": 2.5428571428571427e-06, + "logits/chosen": -5.413943290710449, + "logits/rejected": -5.427316665649414, + "logps/chosen": -372.1170654296875, + "logps/rejected": -312.30279541015625, + "loss": 0.651799201965332, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.179380863904953, + "rewards/margins": 0.09846550226211548, + "rewards/rejected": 0.08091535419225693, + "step": 85 + }, + { + "epoch": 0.7741935483870968, + "grad_norm": 2.4072341918945312, + "learning_rate": 2.4952380952380955e-06, + "logits/chosen": -5.4685492515563965, + "logits/rejected": -5.51275634765625, + "logps/chosen": -345.37188720703125, + "logps/rejected": -273.87261962890625, + "loss": 0.6362700462341309, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.33122482895851135, + "rewards/margins": 0.1338074505329132, + "rewards/rejected": 0.19741736352443695, + "step": 90 + }, + { + "epoch": 0.8172043010752689, + "grad_norm": 1.745698094367981, + "learning_rate": 2.4476190476190475e-06, + "logits/chosen": -5.6481757164001465, + "logits/rejected": -5.4260029792785645, + "logps/chosen": -337.11767578125, + "logps/rejected": -263.3458251953125, + "loss": 0.6222721099853515, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.25297099351882935, + "rewards/margins": 0.1701255589723587, + "rewards/rejected": 0.08284540474414825, + "step": 95 + }, + { + "epoch": 0.8602150537634409, + "grad_norm": 1.4592756032943726, + "learning_rate": 2.4000000000000003e-06, + "logits/chosen": -5.402789115905762, + "logits/rejected": -5.280846118927002, + "logps/chosen": -304.55316162109375, + "logps/rejected": -314.95306396484375, + "loss": 0.6404934883117676, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.29103565216064453, + "rewards/margins": 0.12328717857599258, + "rewards/rejected": 0.16774848103523254, + "step": 100 + }, + { + "epoch": 0.9032258064516129, + "grad_norm": 7.651180744171143, + "learning_rate": 2.3523809523809523e-06, + "logits/chosen": -5.333284854888916, + "logits/rejected": -5.209356307983398, + "logps/chosen": -385.76483154296875, + "logps/rejected": -317.0154724121094, + "loss": 0.5562876224517822, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.5905097126960754, + "rewards/margins": 0.39645156264305115, + "rewards/rejected": 0.19405809044837952, + "step": 105 + }, + { + "epoch": 0.946236559139785, + "grad_norm": 2.2162137031555176, + "learning_rate": 2.304761904761905e-06, + "logits/chosen": -5.9164533615112305, + "logits/rejected": -5.660351276397705, + "logps/chosen": -362.57720947265625, + "logps/rejected": -328.386962890625, + "loss": 0.5672832489013672, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": 0.4752024710178375, + "rewards/margins": 0.3182791471481323, + "rewards/rejected": 0.15692326426506042, + "step": 110 + }, + { + "epoch": 0.989247311827957, + "grad_norm": 2.102505683898926, + "learning_rate": 2.257142857142857e-06, + "logits/chosen": -5.367539882659912, + "logits/rejected": -5.304169654846191, + "logps/chosen": -389.9139404296875, + "logps/rejected": -303.767822265625, + "loss": 0.563088321685791, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.9107279777526855, + "rewards/margins": 0.7586480379104614, + "rewards/rejected": 0.15207989513874054, + "step": 115 + }, + { + "epoch": 1.0258064516129033, + "grad_norm": 2.9593698978424072, + "learning_rate": 2.20952380952381e-06, + "logits/chosen": -5.506618976593018, + "logits/rejected": -5.349832057952881, + "logps/chosen": -299.4593505859375, + "logps/rejected": -229.88784790039062, + "loss": 0.5665022850036621, + "rewards/accuracies": 0.8823529481887817, + "rewards/chosen": 0.4371771514415741, + "rewards/margins": 0.3876599073410034, + "rewards/rejected": 0.04951724037528038, + "step": 120 + }, + { + "epoch": 1.0688172043010753, + "grad_norm": 2.5988082885742188, + "learning_rate": 2.161904761904762e-06, + "logits/chosen": -5.542575836181641, + "logits/rejected": -5.428658485412598, + "logps/chosen": -353.8345642089844, + "logps/rejected": -312.9586486816406, + "loss": 0.5137276172637939, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.6325557827949524, + "rewards/margins": 0.44065460562705994, + "rewards/rejected": 0.19190113246440887, + "step": 125 + }, + { + "epoch": 1.1118279569892473, + "grad_norm": 1.6948280334472656, + "learning_rate": 2.1142857142857147e-06, + "logits/chosen": -5.847611427307129, + "logits/rejected": -5.498036861419678, + "logps/chosen": -300.35577392578125, + "logps/rejected": -311.05126953125, + "loss": 0.5773736953735351, + "rewards/accuracies": 0.875, + "rewards/chosen": 0.6476074457168579, + "rewards/margins": 0.37814000248908997, + "rewards/rejected": 0.2694675028324127, + "step": 130 + }, + { + "epoch": 1.1548387096774193, + "grad_norm": 1.372768759727478, + "learning_rate": 2.0666666666666666e-06, + "logits/chosen": -5.619741916656494, + "logits/rejected": -5.618372917175293, + "logps/chosen": -367.06732177734375, + "logps/rejected": -301.2478332519531, + "loss": 0.46464052200317385, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.8353813290596008, + "rewards/margins": 0.6073431968688965, + "rewards/rejected": 0.22803807258605957, + "step": 135 + }, + { + "epoch": 1.1978494623655913, + "grad_norm": 2.498854637145996, + "learning_rate": 2.0190476190476195e-06, + "logits/chosen": -5.195496559143066, + "logits/rejected": -4.919422149658203, + "logps/chosen": -333.09979248046875, + "logps/rejected": -317.78167724609375, + "loss": 0.5423533916473389, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.46827277541160583, + "rewards/margins": 0.42974653840065, + "rewards/rejected": 0.0385262668132782, + "step": 140 + }, + { + "epoch": 1.2408602150537635, + "grad_norm": 1.451978087425232, + "learning_rate": 1.9714285714285714e-06, + "logits/chosen": -5.425684452056885, + "logits/rejected": -5.322096824645996, + "logps/chosen": -314.4623107910156, + "logps/rejected": -271.85479736328125, + "loss": 0.4632129192352295, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9266977310180664, + "rewards/margins": 0.672536313533783, + "rewards/rejected": 0.25416144728660583, + "step": 145 + }, + { + "epoch": 1.2838709677419355, + "grad_norm": 2.0581016540527344, + "learning_rate": 1.923809523809524e-06, + "logits/chosen": -4.949021339416504, + "logits/rejected": -5.033829212188721, + "logps/chosen": -356.3919982910156, + "logps/rejected": -330.82720947265625, + "loss": 0.4621857166290283, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7969595789909363, + "rewards/margins": 0.6875919103622437, + "rewards/rejected": 0.10936765372753143, + "step": 150 + }, + { + "epoch": 1.3268817204301075, + "grad_norm": 1.6490590572357178, + "learning_rate": 1.8761904761904762e-06, + "logits/chosen": -5.267385005950928, + "logits/rejected": -5.265533924102783, + "logps/chosen": -297.99224853515625, + "logps/rejected": -326.91339111328125, + "loss": 0.49877166748046875, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.7068697810173035, + "rewards/margins": 0.6336223483085632, + "rewards/rejected": 0.07324743270874023, + "step": 155 + }, + { + "epoch": 1.3698924731182796, + "grad_norm": 1.483849048614502, + "learning_rate": 1.8285714285714288e-06, + "logits/chosen": -5.356790065765381, + "logits/rejected": -5.103802680969238, + "logps/chosen": -294.2845153808594, + "logps/rejected": -274.7852783203125, + "loss": 0.49851350784301757, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": 0.6102481484413147, + "rewards/margins": 0.5510420203208923, + "rewards/rejected": 0.05920610576868057, + "step": 160 + }, + { + "epoch": 1.4129032258064516, + "grad_norm": 1.3165900707244873, + "learning_rate": 1.780952380952381e-06, + "logits/chosen": -5.431517124176025, + "logits/rejected": -5.376145362854004, + "logps/chosen": -325.1388244628906, + "logps/rejected": -327.6669616699219, + "loss": 0.4309373378753662, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.6497762799263, + "rewards/margins": 0.9170367121696472, + "rewards/rejected": -0.2672604024410248, + "step": 165 + }, + { + "epoch": 1.4559139784946238, + "grad_norm": 1.5377726554870605, + "learning_rate": 1.7333333333333332e-06, + "logits/chosen": -5.27555513381958, + "logits/rejected": -5.022242069244385, + "logps/chosen": -340.17779541015625, + "logps/rejected": -288.0126647949219, + "loss": 0.4575087547302246, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.5721868276596069, + "rewards/margins": 0.6390407085418701, + "rewards/rejected": -0.06685388088226318, + "step": 170 + }, + { + "epoch": 1.4989247311827958, + "grad_norm": 2.0849504470825195, + "learning_rate": 1.6857142857142858e-06, + "logits/chosen": -5.070017337799072, + "logits/rejected": -5.327781677246094, + "logps/chosen": -360.60809326171875, + "logps/rejected": -292.8214416503906, + "loss": 0.45406789779663087, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.748975396156311, + "rewards/margins": 0.6336467266082764, + "rewards/rejected": 0.11532865464687347, + "step": 175 + }, + { + "epoch": 1.5419354838709678, + "grad_norm": 1.4946179389953613, + "learning_rate": 1.638095238095238e-06, + "logits/chosen": -5.378829002380371, + "logits/rejected": -5.2273030281066895, + "logps/chosen": -369.40679931640625, + "logps/rejected": -310.7534484863281, + "loss": 0.40073528289794924, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9242110252380371, + "rewards/margins": 0.8946256637573242, + "rewards/rejected": 0.029585417360067368, + "step": 180 + }, + { + "epoch": 1.5849462365591398, + "grad_norm": 1.9285597801208496, + "learning_rate": 1.5904761904761906e-06, + "logits/chosen": -5.534226417541504, + "logits/rejected": -5.36181640625, + "logps/chosen": -285.0791931152344, + "logps/rejected": -258.01654052734375, + "loss": 0.44419097900390625, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.7649332284927368, + "rewards/margins": 0.6902278661727905, + "rewards/rejected": 0.07470535486936569, + "step": 185 + }, + { + "epoch": 1.6279569892473118, + "grad_norm": 1.4278947114944458, + "learning_rate": 1.5428571428571428e-06, + "logits/chosen": -5.1719889640808105, + "logits/rejected": -5.186745643615723, + "logps/chosen": -366.37286376953125, + "logps/rejected": -310.18048095703125, + "loss": 0.37736806869506834, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.4671168327331543, + "rewards/margins": 1.110413908958435, + "rewards/rejected": 0.3567030429840088, + "step": 190 + }, + { + "epoch": 1.6709677419354838, + "grad_norm": 1.7930585145950317, + "learning_rate": 1.4952380952380954e-06, + "logits/chosen": -5.458104133605957, + "logits/rejected": -5.514155864715576, + "logps/chosen": -288.04083251953125, + "logps/rejected": -311.49090576171875, + "loss": 0.43456592559814455, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 0.7255552411079407, + "rewards/margins": 0.843337893486023, + "rewards/rejected": -0.11778266727924347, + "step": 195 + }, + { + "epoch": 1.7139784946236558, + "grad_norm": 1.3665006160736084, + "learning_rate": 1.4476190476190478e-06, + "logits/chosen": -5.221610069274902, + "logits/rejected": -5.051304340362549, + "logps/chosen": -379.436279296875, + "logps/rejected": -294.8427734375, + "loss": 0.3780463218688965, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0199247598648071, + "rewards/margins": 1.0175530910491943, + "rewards/rejected": 0.0023716867435723543, + "step": 200 + }, + { + "epoch": 1.7569892473118278, + "grad_norm": 1.2653045654296875, + "learning_rate": 1.4000000000000001e-06, + "logits/chosen": -5.128909111022949, + "logits/rejected": -4.9968366622924805, + "logps/chosen": -374.5747375488281, + "logps/rejected": -310.3750915527344, + "loss": 0.42014646530151367, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.18593430519104, + "rewards/margins": 1.1055948734283447, + "rewards/rejected": 0.08033928275108337, + "step": 205 + }, + { + "epoch": 1.8, + "grad_norm": 1.0835349559783936, + "learning_rate": 1.3523809523809525e-06, + "logits/chosen": -5.398374557495117, + "logits/rejected": -5.446703910827637, + "logps/chosen": -332.9021911621094, + "logps/rejected": -307.1310119628906, + "loss": 0.4112071990966797, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 1.0410544872283936, + "rewards/margins": 1.0919562578201294, + "rewards/rejected": -0.05090172961354256, + "step": 210 + }, + { + "epoch": 1.843010752688172, + "grad_norm": 1.8526785373687744, + "learning_rate": 1.3047619047619047e-06, + "logits/chosen": -5.290182113647461, + "logits/rejected": -5.226934909820557, + "logps/chosen": -376.863525390625, + "logps/rejected": -320.2511291503906, + "loss": 0.4023551940917969, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.0466766357421875, + "rewards/margins": 0.915771484375, + "rewards/rejected": 0.1309051215648651, + "step": 215 + }, + { + "epoch": 1.886021505376344, + "grad_norm": 1.6254379749298096, + "learning_rate": 1.2571428571428571e-06, + "logits/chosen": -5.433383464813232, + "logits/rejected": -5.424604892730713, + "logps/chosen": -276.48382568359375, + "logps/rejected": -254.852294921875, + "loss": 0.42627677917480467, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.1158783435821533, + "rewards/margins": 0.8330133557319641, + "rewards/rejected": 0.2828650176525116, + "step": 220 + }, + { + "epoch": 1.9290322580645163, + "grad_norm": 3.121758460998535, + "learning_rate": 1.2095238095238095e-06, + "logits/chosen": -5.485299110412598, + "logits/rejected": -5.462108612060547, + "logps/chosen": -276.477294921875, + "logps/rejected": -285.85516357421875, + "loss": 0.4259671688079834, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": 0.8818387985229492, + "rewards/margins": 0.7649229764938354, + "rewards/rejected": 0.11691585928201675, + "step": 225 + }, + { + "epoch": 1.9720430107526883, + "grad_norm": 2.3801393508911133, + "learning_rate": 1.161904761904762e-06, + "logits/chosen": -5.211794376373291, + "logits/rejected": -4.747314929962158, + "logps/chosen": -416.1683654785156, + "logps/rejected": -365.8246765136719, + "loss": 0.3910404443740845, + "rewards/accuracies": 0.925000011920929, + "rewards/chosen": 1.5805695056915283, + "rewards/margins": 1.2381842136383057, + "rewards/rejected": 0.34238511323928833, + "step": 230 + }, + { + "epoch": 2.0086021505376346, + "grad_norm": 1.0709431171417236, + "learning_rate": 1.1142857142857143e-06, + "logits/chosen": -5.255922794342041, + "logits/rejected": -5.191451549530029, + "logps/chosen": -329.1349182128906, + "logps/rejected": -281.3226623535156, + "loss": 0.4125385761260986, + "rewards/accuracies": 0.970588207244873, + "rewards/chosen": 1.2619037628173828, + "rewards/margins": 1.028172254562378, + "rewards/rejected": 0.2337314933538437, + "step": 235 + }, + { + "epoch": 2.0516129032258066, + "grad_norm": 1.131330966949463, + "learning_rate": 1.0666666666666667e-06, + "logits/chosen": -5.735346794128418, + "logits/rejected": -5.384338855743408, + "logps/chosen": -354.9601135253906, + "logps/rejected": -285.20147705078125, + "loss": 0.3143571138381958, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.6434673070907593, + "rewards/margins": 1.7338101863861084, + "rewards/rejected": -0.09034281969070435, + "step": 240 + }, + { + "epoch": 2.0946236559139786, + "grad_norm": 1.3222469091415405, + "learning_rate": 1.019047619047619e-06, + "logits/chosen": -5.4371256828308105, + "logits/rejected": -5.41799259185791, + "logps/chosen": -318.0994567871094, + "logps/rejected": -296.23126220703125, + "loss": 0.3331931114196777, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9413619041442871, + "rewards/margins": 1.1318610906600952, + "rewards/rejected": -0.1904991865158081, + "step": 245 + }, + { + "epoch": 2.1376344086021506, + "grad_norm": 1.1309553384780884, + "learning_rate": 9.714285714285715e-07, + "logits/chosen": -5.32918643951416, + "logits/rejected": -5.228451251983643, + "logps/chosen": -386.4266662597656, + "logps/rejected": -299.82586669921875, + "loss": 0.30677978992462157, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.5374553203582764, + "rewards/margins": 1.2936350107192993, + "rewards/rejected": 0.24382023513317108, + "step": 250 + }, + { + "epoch": 2.1806451612903226, + "grad_norm": 1.2125121355056763, + "learning_rate": 9.238095238095239e-07, + "logits/chosen": -5.294185638427734, + "logits/rejected": -5.28645658493042, + "logps/chosen": -297.06683349609375, + "logps/rejected": -259.61456298828125, + "loss": 0.35644917488098143, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.0588313341140747, + "rewards/margins": 0.9543617367744446, + "rewards/rejected": 0.10446955263614655, + "step": 255 + }, + { + "epoch": 2.2236559139784946, + "grad_norm": 1.6852556467056274, + "learning_rate": 8.761904761904763e-07, + "logits/chosen": -5.6651225090026855, + "logits/rejected": -5.341966152191162, + "logps/chosen": -343.6695556640625, + "logps/rejected": -288.97869873046875, + "loss": 0.3159534454345703, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1598581075668335, + "rewards/margins": 1.1884613037109375, + "rewards/rejected": -0.02860334888100624, + "step": 260 + }, + { + "epoch": 2.2666666666666666, + "grad_norm": 1.3867968320846558, + "learning_rate": 8.285714285714287e-07, + "logits/chosen": -5.487370491027832, + "logits/rejected": -5.273520469665527, + "logps/chosen": -370.5751037597656, + "logps/rejected": -309.1852722167969, + "loss": 0.2891366720199585, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.5521111488342285, + "rewards/margins": 1.3738138675689697, + "rewards/rejected": 0.17829741537570953, + "step": 265 + }, + { + "epoch": 2.3096774193548386, + "grad_norm": 1.3379493951797485, + "learning_rate": 7.80952380952381e-07, + "logits/chosen": -5.195990085601807, + "logits/rejected": -5.239910125732422, + "logps/chosen": -313.62939453125, + "logps/rejected": -252.7853546142578, + "loss": 0.32800557613372805, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.3296881914138794, + "rewards/margins": 1.1511423587799072, + "rewards/rejected": 0.1785457581281662, + "step": 270 + }, + { + "epoch": 2.3526881720430106, + "grad_norm": 1.2220584154129028, + "learning_rate": 7.333333333333333e-07, + "logits/chosen": -5.423740386962891, + "logits/rejected": -5.007106781005859, + "logps/chosen": -338.8307189941406, + "logps/rejected": -317.20355224609375, + "loss": 0.2776246786117554, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.2019110918045044, + "rewards/margins": 1.4179589748382568, + "rewards/rejected": -0.21604792773723602, + "step": 275 + }, + { + "epoch": 2.3956989247311826, + "grad_norm": 1.2976250648498535, + "learning_rate": 6.857142857142857e-07, + "logits/chosen": -5.235350608825684, + "logits/rejected": -5.151061534881592, + "logps/chosen": -311.9940185546875, + "logps/rejected": -273.13653564453125, + "loss": 0.29607300758361815, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1353685855865479, + "rewards/margins": 1.4848861694335938, + "rewards/rejected": -0.34951773285865784, + "step": 280 + }, + { + "epoch": 2.4387096774193546, + "grad_norm": 1.5546081066131592, + "learning_rate": 6.380952380952381e-07, + "logits/chosen": -5.2240986824035645, + "logits/rejected": -5.1968793869018555, + "logps/chosen": -343.4505920410156, + "logps/rejected": -266.83563232421875, + "loss": 0.3283820152282715, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.146566390991211, + "rewards/margins": 1.1784019470214844, + "rewards/rejected": -0.03183561936020851, + "step": 285 + }, + { + "epoch": 2.481720430107527, + "grad_norm": 1.9226080179214478, + "learning_rate": 5.904761904761905e-07, + "logits/chosen": -5.224617958068848, + "logits/rejected": -5.07640266418457, + "logps/chosen": -368.66729736328125, + "logps/rejected": -335.21917724609375, + "loss": 0.31503658294677733, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1312525272369385, + "rewards/margins": 1.1962707042694092, + "rewards/rejected": -0.0650181695818901, + "step": 290 + }, + { + "epoch": 2.524731182795699, + "grad_norm": 1.0341100692749023, + "learning_rate": 5.428571428571429e-07, + "logits/chosen": -5.5554656982421875, + "logits/rejected": -5.393430709838867, + "logps/chosen": -295.96405029296875, + "logps/rejected": -290.5457458496094, + "loss": 0.2795746326446533, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.1615794897079468, + "rewards/margins": 1.2998874187469482, + "rewards/rejected": -0.1383078545331955, + "step": 295 + }, + { + "epoch": 2.567741935483871, + "grad_norm": 1.2715569734573364, + "learning_rate": 4.952380952380952e-07, + "logits/chosen": -5.555523872375488, + "logits/rejected": -5.402945518493652, + "logps/chosen": -335.7245788574219, + "logps/rejected": -330.8264465332031, + "loss": 0.2791964292526245, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.0191905498504639, + "rewards/margins": 1.5995515584945679, + "rewards/rejected": -0.580361008644104, + "step": 300 + }, + { + "epoch": 2.610752688172043, + "grad_norm": 1.1054896116256714, + "learning_rate": 4.4761904761904764e-07, + "logits/chosen": -5.346233367919922, + "logits/rejected": -5.2186713218688965, + "logps/chosen": -344.9323425292969, + "logps/rejected": -347.8134765625, + "loss": 0.2980963945388794, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1464207172393799, + "rewards/margins": 1.3599145412445068, + "rewards/rejected": -0.21349389851093292, + "step": 305 + }, + { + "epoch": 2.653763440860215, + "grad_norm": 0.9144394397735596, + "learning_rate": 4e-07, + "logits/chosen": -5.2957868576049805, + "logits/rejected": -5.262829780578613, + "logps/chosen": -359.19464111328125, + "logps/rejected": -328.304443359375, + "loss": 0.25096189975738525, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.198318362236023, + "rewards/margins": 1.7069896459579468, + "rewards/rejected": -0.5086712837219238, + "step": 310 + }, + { + "epoch": 2.696774193548387, + "grad_norm": 0.8603355884552002, + "learning_rate": 3.523809523809524e-07, + "logits/chosen": -5.491151809692383, + "logits/rejected": -5.42437219619751, + "logps/chosen": -367.32843017578125, + "logps/rejected": -313.4622497558594, + "loss": 0.2945852279663086, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.4304109811782837, + "rewards/margins": 1.554801106452942, + "rewards/rejected": -0.12439011037349701, + "step": 315 + }, + { + "epoch": 2.739784946236559, + "grad_norm": 1.1267086267471313, + "learning_rate": 3.0476190476190477e-07, + "logits/chosen": -5.442085266113281, + "logits/rejected": -5.499457359313965, + "logps/chosen": -298.1466979980469, + "logps/rejected": -318.5505676269531, + "loss": 0.36453125476837156, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 0.9415658712387085, + "rewards/margins": 1.0582208633422852, + "rewards/rejected": -0.11665502935647964, + "step": 320 + }, + { + "epoch": 2.782795698924731, + "grad_norm": 1.8089045286178589, + "learning_rate": 2.5714285714285716e-07, + "logits/chosen": -5.749306678771973, + "logits/rejected": -5.6225762367248535, + "logps/chosen": -254.8236541748047, + "logps/rejected": -271.09698486328125, + "loss": 0.2935594081878662, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 0.8951157331466675, + "rewards/margins": 1.194537878036499, + "rewards/rejected": -0.2994222044944763, + "step": 325 + }, + { + "epoch": 2.825806451612903, + "grad_norm": 1.1241213083267212, + "learning_rate": 2.0952380952380953e-07, + "logits/chosen": -5.630118370056152, + "logits/rejected": -5.584993362426758, + "logps/chosen": -267.04742431640625, + "logps/rejected": -239.6341094970703, + "loss": 0.30673086643218994, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.19244384765625, + "rewards/margins": 1.149562120437622, + "rewards/rejected": 0.04288160055875778, + "step": 330 + }, + { + "epoch": 2.868817204301075, + "grad_norm": 1.8405497074127197, + "learning_rate": 1.6190476190476192e-07, + "logits/chosen": -5.395017623901367, + "logits/rejected": -5.194415092468262, + "logps/chosen": -344.854736328125, + "logps/rejected": -338.8992919921875, + "loss": 0.2845343351364136, + "rewards/accuracies": 1.0, + "rewards/chosen": 1.1038144826889038, + "rewards/margins": 1.5797145366668701, + "rewards/rejected": -0.4759000241756439, + "step": 335 + }, + { + "epoch": 2.9118279569892476, + "grad_norm": 1.088416576385498, + "learning_rate": 1.142857142857143e-07, + "logits/chosen": -5.30595588684082, + "logits/rejected": -5.5908002853393555, + "logps/chosen": -325.8397521972656, + "logps/rejected": -315.6219177246094, + "loss": 0.2914729595184326, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": 1.364817500114441, + "rewards/margins": 1.4060461521148682, + "rewards/rejected": -0.04122857004404068, + "step": 340 + }, + { + "epoch": 2.9548387096774196, + "grad_norm": 1.6547528505325317, + "learning_rate": 6.666666666666667e-08, + "logits/chosen": -5.339937686920166, + "logits/rejected": -5.211861610412598, + "logps/chosen": -356.51739501953125, + "logps/rejected": -403.10015869140625, + "loss": 0.36704728603363035, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": 1.14650297164917, + "rewards/margins": 1.1948604583740234, + "rewards/rejected": -0.04835757985711098, + "step": 345 + }, + { + "epoch": 2.9978494623655916, + "grad_norm": 1.088441014289856, + "learning_rate": 1.9047619047619048e-08, + "logits/chosen": -5.6740312576293945, + "logits/rejected": -5.567724704742432, + "logps/chosen": -295.913330078125, + "logps/rejected": -332.6319274902344, + "loss": 0.27830004692077637, + "rewards/accuracies": 1.0, + "rewards/chosen": 0.9771059155464172, + "rewards/margins": 1.485169768333435, + "rewards/rejected": -0.5080639123916626, + "step": 350 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-351/training_args.bin b/gemma-9b-dpo/checkpoint-351/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-351/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/checkpoint-50/README.md b/gemma-9b-dpo/checkpoint-50/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a367db0a4fe66cceaf15eef8697f5b4480f48fd3 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/README.md @@ -0,0 +1,209 @@ +--- +base_model: google/gemma-2-9b-it +library_name: peft +pipeline_tag: text-generation +tags: +- base_model:adapter:google/gemma-2-9b-it +- dpo +- lora +- transformers +- trl +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.18.1 \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-50/adapter_config.json b/gemma-9b-dpo/checkpoint-50/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..c80c202b52eb25538c92a8e095abe785cdc6f749 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": null, + "base_model_name_or_path": "google/gemma-2-9b-it", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 32, + "lora_bias": false, + "lora_dropout": 0.05, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": null, + "peft_type": "LORA", + "peft_version": "0.18.1", + "qalora_group_size": 16, + "r": 16, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "down_proj", + "v_proj", + "k_proj", + "up_proj", + "q_proj", + "gate_proj", + "o_proj" + ], + "target_parameters": null, + "task_type": "CAUSAL_LM", + "trainable_token_indices": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-50/adapter_model.safetensors b/gemma-9b-dpo/checkpoint-50/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..94b515604ec92f4a5389fbdf1d04e0048a931a0f --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bfebe224dde13dbac638df38792be50e5c742449a9f48714934f72678ce0db33 +size 216151256 diff --git a/gemma-9b-dpo/checkpoint-50/chat_template.jinja b/gemma-9b-dpo/checkpoint-50/chat_template.jinja new file mode 100644 index 0000000000000000000000000000000000000000..923ec253c8dbefbb41cf084db7251df41d000f6d --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/gemma-9b-dpo/checkpoint-50/optimizer.pt b/gemma-9b-dpo/checkpoint-50/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ece6ad8b5024992207bf4bfc6654cd1fabea145f --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7632371c4db2da8af7b342a1972fedc7c4b921d10620a5a776e3342743b5f0 +size 110425877 diff --git a/gemma-9b-dpo/checkpoint-50/rng_state.pth b/gemma-9b-dpo/checkpoint-50/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b059ca2a76606ab54a2502b2b3c9150c6b08b6eb --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:250560ab3d528161ab3659b120def6e4a9ab4b457e3399603bbcfa40db3efc90 +size 14645 diff --git a/gemma-9b-dpo/checkpoint-50/scheduler.pt b/gemma-9b-dpo/checkpoint-50/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..fb10a23b98c9694c5e3c32dbd15bf0934f7d70d8 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dfc14798f09558da03215703cd567e4d98cc190f06186e9b5fd2ef6fa27b4dc +size 1465 diff --git a/gemma-9b-dpo/checkpoint-50/tokenizer.json b/gemma-9b-dpo/checkpoint-50/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..6523cc0616b64c563af913a417dfa7eb01549a2c --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:394ace002a144ac6ad5486387502f2d36f70c087310c3d907857240c76fcb36e +size 34362748 diff --git a/gemma-9b-dpo/checkpoint-50/tokenizer_config.json b/gemma-9b-dpo/checkpoint-50/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..eabc4ed8ab34751069a1970d61e616ce93e53880 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/tokenizer_config.json @@ -0,0 +1,19 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "extra_special_tokens": [ + "", + "" + ], + "is_local": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/gemma-9b-dpo/checkpoint-50/trainer_state.json b/gemma-9b-dpo/checkpoint-50/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b57260504436155bc46a874c07cc010289c72456 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/trainer_state.json @@ -0,0 +1,184 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.43010752688172044, + "eval_steps": 500, + "global_step": 50, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.043010752688172046, + "grad_norm": 1.6562875509262085, + "learning_rate": 3.333333333333333e-07, + "logits/chosen": -5.94815731048584, + "logits/rejected": -5.856410503387451, + "logps/chosen": -302.07794189453125, + "logps/rejected": -283.5802001953125, + "loss": 0.6852486610412598, + "rewards/accuracies": 0.375, + "rewards/chosen": 0.01349079143255949, + "rewards/margins": 0.017757166177034378, + "rewards/rejected": -0.00426637614145875, + "step": 5 + }, + { + "epoch": 0.08602150537634409, + "grad_norm": 1.7723957300186157, + "learning_rate": 7.5e-07, + "logits/chosen": -5.585428714752197, + "logits/rejected": -5.577895641326904, + "logps/chosen": -362.89239501953125, + "logps/rejected": -280.8010559082031, + "loss": 0.6844330310821534, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.018745478242635727, + "rewards/margins": 0.018739622086286545, + "rewards/rejected": 5.85438692723983e-06, + "step": 10 + }, + { + "epoch": 0.12903225806451613, + "grad_norm": 1.6630758047103882, + "learning_rate": 1.1666666666666668e-06, + "logits/chosen": -5.916059494018555, + "logits/rejected": -5.797668933868408, + "logps/chosen": -307.4602966308594, + "logps/rejected": -316.79803466796875, + "loss": 0.7070020198822021, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.008706416003406048, + "rewards/margins": -0.02138950116932392, + "rewards/rejected": 0.012683087959885597, + "step": 15 + }, + { + "epoch": 0.17204301075268819, + "grad_norm": 5.225494861602783, + "learning_rate": 1.5833333333333333e-06, + "logits/chosen": -5.818185329437256, + "logits/rejected": -5.638014793395996, + "logps/chosen": -329.3303527832031, + "logps/rejected": -292.2645568847656, + "loss": 0.6929943561553955, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.044999174773693085, + "rewards/margins": 0.00216915225610137, + "rewards/rejected": -0.04716832935810089, + "step": 20 + }, + { + "epoch": 0.21505376344086022, + "grad_norm": 2.5229244232177734, + "learning_rate": 2e-06, + "logits/chosen": -5.8189592361450195, + "logits/rejected": -5.646462917327881, + "logps/chosen": -311.9922180175781, + "logps/rejected": -297.0098571777344, + "loss": 0.7002316951751709, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": 0.0544702522456646, + "rewards/margins": -0.012132339179515839, + "rewards/rejected": 0.06660258769989014, + "step": 25 + }, + { + "epoch": 0.25806451612903225, + "grad_norm": 1.6524324417114258, + "learning_rate": 2.4166666666666667e-06, + "logits/chosen": -5.696784496307373, + "logits/rejected": -5.6943678855896, + "logps/chosen": -350.2938537597656, + "logps/rejected": -322.43182373046875, + "loss": 0.699475908279419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0020792910363525152, + "rewards/margins": 0.02348965033888817, + "rewards/rejected": -0.025568943470716476, + "step": 30 + }, + { + "epoch": 0.3010752688172043, + "grad_norm": 9.075493812561035, + "learning_rate": 2.8333333333333335e-06, + "logits/chosen": -5.407717704772949, + "logits/rejected": -5.5642805099487305, + "logps/chosen": -391.3837585449219, + "logps/rejected": -333.9232482910156, + "loss": 0.6926415920257568, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": -0.026233959943056107, + "rewards/margins": 0.00582819152623415, + "rewards/rejected": -0.03206215053796768, + "step": 35 + }, + { + "epoch": 0.34408602150537637, + "grad_norm": 2.6282100677490234, + "learning_rate": 2.9714285714285716e-06, + "logits/chosen": -5.649778366088867, + "logits/rejected": -5.512002944946289, + "logps/chosen": -338.31048583984375, + "logps/rejected": -279.06536865234375, + "loss": 0.6739111423492432, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.04578382521867752, + "rewards/margins": 0.042618148028850555, + "rewards/rejected": 0.0031656839419156313, + "step": 40 + }, + { + "epoch": 0.3870967741935484, + "grad_norm": 16.96689796447754, + "learning_rate": 2.923809523809524e-06, + "logits/chosen": -5.518254280090332, + "logits/rejected": -5.577255725860596, + "logps/chosen": -374.29449462890625, + "logps/rejected": -366.4861145019531, + "loss": 0.7162120819091797, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": 0.03416462987661362, + "rewards/margins": 0.0036566159687936306, + "rewards/rejected": 0.030508000403642654, + "step": 45 + }, + { + "epoch": 0.43010752688172044, + "grad_norm": 1.447816252708435, + "learning_rate": 2.8761904761904764e-06, + "logits/chosen": -5.831571102142334, + "logits/rejected": -5.802765846252441, + "logps/chosen": -316.0965881347656, + "logps/rejected": -269.0494384765625, + "loss": 0.6838803291320801, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.1265859305858612, + "rewards/margins": 0.026753634214401245, + "rewards/rejected": -0.15333956480026245, + "step": 50 + } + ], + "logging_steps": 5, + "max_steps": 351, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 50, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +} diff --git a/gemma-9b-dpo/checkpoint-50/training_args.bin b/gemma-9b-dpo/checkpoint-50/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/checkpoint-50/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097 diff --git a/gemma-9b-dpo/dpo_training_info.json b/gemma-9b-dpo/dpo_training_info.json new file mode 100644 index 0000000000000000000000000000000000000000..1726c10c6fe91442f784b00884539c92867e1925 --- /dev/null +++ b/gemma-9b-dpo/dpo_training_info.json @@ -0,0 +1,10 @@ +{ + "model_id": "google/gemma-2-9b-it", + "base_model": "google/gemma-2-9b-it", + "dpo_data": "data/distillation/dpo_medgemma_450_multipair_fixed.jsonl", + "n_pairs": 930, + "epochs": 3, + "learning_rate": 3e-06, + "beta": 0.1, + "timestamp": "2026-02-21T03:29:38.436984" +} \ No newline at end of file diff --git a/gemma-9b-dpo/training_args.bin b/gemma-9b-dpo/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..3dd4e898c665a87974fee402a2f65954466af4f7 --- /dev/null +++ b/gemma-9b-dpo/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ee585106626f1196ffc9833586c5c11146fc384e5b33ed522cf45bab148032 +size 6097