import torch from huggingface_hub import HfApi from huggingface_hub import create_repo from unsloth import FastLanguageModel import torch from datasets import load_dataset import random max_seq_length = 2048 dtype = None load_in_4bit = True repo_name = "instruct-v19" # do wandb stuff import wandb wandb.init( project="unsloth_lora", name= repo_name, ) model, tokenizer = FastLanguageModel.from_pretrained( model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct", #mistralai/Mistral-Nemo-Instruct-2407 max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = load_in_4bit, token = "", # use one if using gated models like meta-llama/Llama-2-7b-hf ) model = FastLanguageModel.get_peft_model( model, r = 64, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj",], lora_alpha = 16, lora_dropout = 0, # Supports any, but = 0 is optimized bias = "none", # Supports any, but = "none" is optimized # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes! use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context random_state = 3407, use_rslora = False, # We support rank stabilized LoRA loftq_config = None, # And LoftQ ) from datasets import load_dataset dataset = load_dataset("Chaser-cz/ChaiTop100-SHAREGPT") train_dataset = dataset["train"].shuffle(seed=random.randint(1, 9999)) from unsloth.chat_templates import get_chat_template tokenizer = get_chat_template( tokenizer, chat_template = "llama-3", mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style ) def formatting_prompts_func(examples): convos = examples["conversations"] texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos] return { "text" : texts, } pass train_dataset = train_dataset.map(formatting_prompts_func, batched = True,) from trl import SFTTrainer from transformers import TrainingArguments from unsloth import is_bfloat16_supported trainer = SFTTrainer( model = model, tokenizer = tokenizer, train_dataset = train_dataset, dataset_text_field = "text", max_seq_length = max_seq_length, dataset_num_proc = 2, packing = False, # Can make training 5x faster for short sequences. args = TrainingArguments( per_device_train_batch_size = 2, gradient_accumulation_steps = 32, warmup_steps = 5, max_steps = 1000, learning_rate = 2.5e-4, fp16 = not is_bfloat16_supported(), bf16 = is_bfloat16_supported(), logging_steps = 1, optim = "adamw_8bit", weight_decay = 0.01, lr_scheduler_type = "cosine", seed = 3407, output_dir = "outputs/lora-out-8b", save_strategy = "steps", save_steps = 500,) ) trainer_stats = trainer.train() model.save_pretrained_merged("outputs/lora-out-8b/merged", tokenizer, save_method = "merged_16bit",) api = HfApi() create_repo(f"jic062/{repo_name}", repo_type="model",private=True, token="") api.upload_folder( folder_path="outputs/lora-out-8b/merged", repo_id=f"jic062/{repo_name}", repo_type="model", ) wandb.finish()