Upload 3 files

Files changed (3) hide show

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

+---
+license: apache-2.0
+language:
+  - zh
+  - en
+tasks:
+  - Large Language Models
+model_features:
+  - 128k Context
+---
+[ERNIEKit](https://github.com/PaddlePaddle/ERNIE) is a training toolkit based on PaddlePaddle, specifically designed for the ERNIE series of open-source large models. It provides comprehensive support for scenarios such as instruction fine-tuning (SFT, LoRA) and alignment training (DPO), ensuring optimal performance.
+Usage Examples:
+```bash
+# Download model
+huggingface-cli download baidu/ERNIE-4.5-300B-A47B-Base-Paddle --local-dir baidu/ERNIE-4.5-300B-A47B-Base-Paddle
+# SFT
+erniekit train examples/configs/ERNIE-4.5-300B-A47B/sft/run_sft_wint8mix_lora_8k.yaml model_name_or_path=baidu/ERNIE-4.5-300B-A47B-Base-Paddle
+# DPO
+erniekit train examples/configs/ERNIE-4.5-300B-A47B/dpo/run_dpo_wint8mix_lora_8k.yaml model_name_or_path=baidu/ERNIE-4.5-300B-A47B-Base-Paddle
+```
+For more detailed examples, including SFT with LoRA, multi-GPU configurations, and advanced scripts, please refer to the examples folder within the [ERNIEKit](https://github.com/PaddlePaddle/ERNIE) repository.

config.json ADDED Viewed

+{
+    "_attn_implementation": "eager",
+    "architectures": [
+        "Ernie4_5_MoeForCausalLM"
+    ],
+    "auto_map": {
+        "AutoConfig": "configuration_ernie4_5_moe.Ernie4_5_MoeConfig",
+        "AutoModel": "modeling_ernie4_5_moe.Ernie4_5_Model",
+        "AutoModelForCausalLM": "modeling_ernie4_5_moe.Ernie4_5_MoeForCausalLM"
+    },
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "hidden_act": "silu",
+    "hidden_size": 8192,
+    "intermediate_size": 28672,
+    "max_position_embeddings": 131072,
+    "model_type": "ernie4_5_moe",
+    "moe_capacity": [
+        64,
+        64,
+        64
+    ],
+    "moe_gate": "topk",
+    "moe_intermediate_size": 3584,
+    "moe_k": 8,
+    "moe_layer_interval": 1,
+    "moe_layer_start_index": 3,
+    "moe_num_experts": 64,
+    "moe_use_aux_free": true,
+    "num_attention_heads": 64,
+    "num_hidden_layers": 54,
+    "num_key_value_heads": 8,
+    "pad_token_id": 0,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 500000,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "use_bias": false,
+    "use_cache": true,
+    "vocab_size": 103424
+}