|
|
--- |
|
|
library_name: transformers |
|
|
license: apache-2.0 |
|
|
datasets: |
|
|
- zerofata/Roleplay-Anime-Characters |
|
|
- zerofata/Instruct-Anime-CreativeWriting |
|
|
- zerofata/Summaries-Anime-FandomPages |
|
|
- zerofata/Instruct-Anime |
|
|
base_model: |
|
|
- ConicCat/Mistral-Small-3.2-AntiRep-24B |
|
|
--- |
|
|
<!DOCTYPE html> |
|
|
<style> |
|
|
body { |
|
|
font-family: 'Georgia', 'Times New Roman', serif; |
|
|
color: #dce4f0; /* Soft off-white */ |
|
|
line-height: 1.6; |
|
|
margin: 0; |
|
|
padding: 0; |
|
|
background-color: #161a25; /* Deep blue from dark sky */ |
|
|
} |
|
|
|
|
|
.lemonade-text { |
|
|
color: #89d8ff; /* Bright blue from city lights */ |
|
|
position: relative; |
|
|
z-index: 2; |
|
|
margin-left: 0.2em; |
|
|
text-shadow: 0 0 15px #89d8ff; |
|
|
} |
|
|
|
|
|
/* Section styling */ |
|
|
.section-container { |
|
|
background-color: rgba(32, 40, 56, 0.7); /* Slightly transparent dark blue */ |
|
|
margin-bottom: 30px; |
|
|
position: relative; |
|
|
overflow: hidden; |
|
|
border-bottom: 1px solid #ff9966; /* Sunset orange */ |
|
|
box-shadow: 0 4px 15px rgba(255, 153, 102, 0.05); |
|
|
} |
|
|
|
|
|
.section-header { |
|
|
display: flex; |
|
|
align-items: center; |
|
|
background-color: rgba(255, 153, 102, 0.12); |
|
|
padding: 10px 20px; |
|
|
} |
|
|
|
|
|
.section-indicator { |
|
|
width: 8px; |
|
|
height: 20px; |
|
|
background-color: #ff9966; /* Sunset orange */ |
|
|
margin-right: 15px; |
|
|
box-shadow: 0 0 8px rgba(255, 153, 102, 0.2); |
|
|
} |
|
|
|
|
|
.section-title { |
|
|
font-family: 'Playfair Display', serif; /* Using the new font */ |
|
|
color: #ffb399; /* Lighter sunset shade */ |
|
|
font-size: 1.4rem; |
|
|
margin: 0; |
|
|
letter-spacing: 1px; |
|
|
font-weight: 400; |
|
|
text-transform: capitalize; |
|
|
} |
|
|
|
|
|
.section-content { |
|
|
padding: 20px; |
|
|
font-family: 'Crimson Text', serif; /* Using the new font */ |
|
|
color: #dce4f0; |
|
|
line-height: 1.6; |
|
|
} |
|
|
|
|
|
/* Title styling */ |
|
|
.title-container { |
|
|
background-color: #202838; |
|
|
position: relative; |
|
|
overflow: hidden; |
|
|
margin-bottom: 40px; |
|
|
border-left: 3px solid #ff9966; /* Sunset orange */ |
|
|
box-shadow: 0 6px 20px rgba(255, 153, 102, 0.07); |
|
|
} |
|
|
|
|
|
.title-wrapper { |
|
|
position: relative; |
|
|
z-index: 2; |
|
|
padding: 25px 20px 30px 30px; |
|
|
font-family: 'Playfair Display', serif; |
|
|
} |
|
|
|
|
|
.title-main { |
|
|
color: #ffb399; /* Lighter sunset shade */ |
|
|
font-size: 2.5rem; |
|
|
font-weight: 700; |
|
|
margin: 0; |
|
|
letter-spacing: 2px; |
|
|
display: inline-block; |
|
|
position: relative; |
|
|
text-transform: uppercase; |
|
|
} |
|
|
|
|
|
.title-prefix { |
|
|
position: relative; |
|
|
z-index: 2; |
|
|
} |
|
|
|
|
|
.title-subtitle { |
|
|
padding-left: 15px; |
|
|
margin-top: 5px; |
|
|
margin-left: 5px; |
|
|
} |
|
|
|
|
|
.subtitle-text { |
|
|
color: #a6c8e0; /* Muted sky blue */ |
|
|
font-size: 1.2rem; |
|
|
font-family: 'Crimson Text', serif; |
|
|
font-weight: 300; |
|
|
letter-spacing: 3px; |
|
|
text-transform: uppercase; |
|
|
display: inline-block; |
|
|
} |
|
|
|
|
|
.glitchy-overlay { |
|
|
position: absolute; |
|
|
top: 0; |
|
|
left: 0; |
|
|
width: 100%; |
|
|
height: 100%; |
|
|
background-image: repeating-linear-gradient(0deg, rgba(0,0,0,0) 0, rgba(137, 216, 255, 0.08) 1px, rgba(0,0,0,0) 2px); /* Rain effect with blue tint */ |
|
|
z-index: 1; |
|
|
} |
|
|
|
|
|
/* Data box styling */ |
|
|
.data-box { |
|
|
background-color: rgba(22, 26, 37, 0.6); |
|
|
padding: 15px; |
|
|
border-left: 2px solid #ff9966; /* Sunset orange */ |
|
|
margin-bottom: 20px; |
|
|
box-shadow: 0 2px 10px rgba(255, 153, 102, 0.05); |
|
|
} |
|
|
|
|
|
.data-row { |
|
|
display: flex; |
|
|
margin-bottom: 8px; |
|
|
} |
|
|
|
|
|
.data-arrow { |
|
|
color: #ff9966; /* Sunset orange */ |
|
|
width: 20px; |
|
|
display: inline-block; |
|
|
} |
|
|
|
|
|
.data-label { |
|
|
color: #a6c8e0; /* Muted sky blue */ |
|
|
width: 80px; |
|
|
display: inline-block; |
|
|
} |
|
|
|
|
|
/* Subheading styling */ |
|
|
.subheading { |
|
|
color: #a6c8e0; /* Muted sky blue */ |
|
|
font-size: 1.1rem; |
|
|
margin-top: 20px; |
|
|
margin-bottom: 15px; |
|
|
font-weight: 400; |
|
|
border-bottom: 1px dashed rgba(166, 200, 224, 0.4); |
|
|
display: inline-block; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
font-family: 'Playfair Display', serif; |
|
|
} |
|
|
|
|
|
/* Links */ |
|
|
a { |
|
|
color: #89d8ff; /* Bright blue from city lights */ |
|
|
text-decoration: none; |
|
|
} |
|
|
|
|
|
a:hover { |
|
|
text-decoration: underline; |
|
|
color: #ffb399; /* Lighter sunset shade on hover */ |
|
|
} |
|
|
|
|
|
/* Container */ |
|
|
.container { |
|
|
max-width: 1200px; |
|
|
margin: 20px auto; |
|
|
padding: 40px 20px; |
|
|
background-color: #202838; /* Darker container background */ |
|
|
background-image: |
|
|
radial-gradient(circle at 20% 80%, rgba(255, 153, 102, 0.04) 0%, transparent 50%), /* Sunset glow */ |
|
|
radial-gradient(circle at 80% 20%, rgba(137, 216, 255, 0.04) 0%, transparent 50%), /* Blue glow */ |
|
|
radial-gradient(circle at 40% 40%, rgba(224, 230, 241, 0.02) 0%, transparent 50%); /* Faint cloud/light glow */ |
|
|
min-height: calc(100vh - 40px); |
|
|
border: 1px solid #ff9966; /* Sunset orange */ |
|
|
border-radius: 8px; |
|
|
box-shadow: 0 8px 32px rgba(255, 153, 102, 0.07); |
|
|
} |
|
|
|
|
|
/* Dropdown styling */ |
|
|
.dropdown-container { |
|
|
margin-top: 20px; |
|
|
} |
|
|
|
|
|
.dropdown-summary { |
|
|
cursor: pointer; |
|
|
padding: 10px 0; |
|
|
border-bottom: 1px dashed rgba(166, 200, 224, 0.4); |
|
|
color: #a6c8e0; /* Muted sky blue */ |
|
|
font-size: 1.1rem; |
|
|
font-weight: 400; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
font-family: 'Playfair Display', serif; |
|
|
list-style: none; |
|
|
display: flex; |
|
|
align-items: center; |
|
|
} |
|
|
|
|
|
.dropdown-summary::-webkit-details-marker { |
|
|
display: none; |
|
|
} |
|
|
|
|
|
.dropdown-arrow { |
|
|
color: #ff9966; /* Sunset orange */ |
|
|
margin-right: 10px; |
|
|
transition: transform 0.3s ease; |
|
|
} |
|
|
|
|
|
.dropdown-container[open] .dropdown-arrow { |
|
|
transform: rotate(90deg); |
|
|
} |
|
|
|
|
|
.dropdown-content { |
|
|
margin-top: 15px; |
|
|
padding: 15px; |
|
|
background-color: rgba(22, 26, 37, 0.6); |
|
|
border-left: 2px solid #ff9966; /* Sunset orange */ |
|
|
box-shadow: 0 2px 10px rgba(255, 153, 102, 0.05); |
|
|
} |
|
|
|
|
|
.config-title { |
|
|
color: #a6c8e0; /* Muted sky blue */ |
|
|
font-size: 1rem; |
|
|
margin-bottom: 10px; |
|
|
font-family: 'Playfair Display', serif; |
|
|
text-transform: uppercase; |
|
|
letter-spacing: 1px; |
|
|
} |
|
|
</style> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Painted Fantasy</title> |
|
|
<link href="https://fonts.googleapis.com/css2?family=Crimson+Text:wght@400;600;700&family=Playfair+Display:wght@400;700&display=swap" rel="stylesheet"> |
|
|
</head> |
|
|
<body> |
|
|
|
|
|
<div class="container"> |
|
|
<div class="title-container"> |
|
|
<!-- Glitchy overlay --> |
|
|
<div class="glitchy-overlay"></div> |
|
|
<!-- Main title --> |
|
|
<div class="title-wrapper"> |
|
|
<h1 class="title-main"> |
|
|
<span class="title-prefix">PAINTED FANTASY</span> |
|
|
<span class="lemonade-text">v2</span> |
|
|
</h1> |
|
|
<div class="title-subtitle"> |
|
|
<span class="subtitle-text">MS3.2-24B</span> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
 |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Overview</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<p>This is an uncensored creative model intended to excel at character driven RP / ERP.</p> |
|
|
<p>Version 2 feels quite different from the original, with a heavy focus on reducing repetition across conversations and improving instruction following.</p> |
|
|
<p>Has a pretty unique writing style and sense of creativity (IMO). Pays the price with intermittent brain farts though.</p> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">SillyTavern Settings</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<h3 class="subheading">Recommended Roleplay Format</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Actions:</span> |
|
|
<span>In plaintext</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Dialogue:</span> |
|
|
<span>"In quotes"</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Thoughts:</span> |
|
|
<span>*In asterisks*</span> |
|
|
</div> |
|
|
</div> |
|
|
<h3 class="subheading">Suggested Samplers</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Temp:</span> |
|
|
<span>0.5-0.6</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">MinP:</span> |
|
|
<span>0.1</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">TopP:</span> |
|
|
<span>0.95</span> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span class="data-arrow">></span> |
|
|
<span class="data-label">Dry:</span> |
|
|
<span>0.8, 1.75, 4</span> |
|
|
</div> |
|
|
</div> |
|
|
<h3 class="subheading">Instruct</h3> |
|
|
<div class="data-box"> |
|
|
<p style="margin: 0;">Mistral v7 Tekken</p> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Quantizations</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<div style="margin-bottom: 20px;"> |
|
|
<h3 class="subheading">GGUF</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-v2-24B-GGUF">Static (mradermacher)</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/mradermacher/MS3.2-PaintedFantasy-v2-24B-i1-GGUF">iMatrix (mradermacher)</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
<div> |
|
|
<h3 class="subheading">EXL3</h3> |
|
|
<div class="data-box"> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-3bpw">3bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-3.5bpw">3.5bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-4bpw">4bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-5bpw">5bpw</a> |
|
|
</div> |
|
|
<div class="data-row"> |
|
|
<span style="color: #ff9966; display: inline-block; margin-right: 10px;">> </span><a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-v2-24b-exl3-6bpw">6bpw</a> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
|
|
|
<div class="section-container"> |
|
|
<div class="section-header"> |
|
|
<div class="section-indicator"></div> |
|
|
<h2 class="section-title">Training Process</h2> |
|
|
</div> |
|
|
<div class="section-content"> |
|
|
<p>Training process: SFT > DPO > KTO</p> |
|
|
<p>SFT with RP/ERP, Stories and in character assistant data.</p> |
|
|
<p>DPO focused on reducing repetition, misgendered characters and slop.</p> |
|
|
<p>KTO focused on further reducing repetition and slop.</p> |
|
|
<div class="dropdown-container"> |
|
|
<details> |
|
|
<summary class="dropdown-summary"> |
|
|
<span class="dropdown-arrow">></span> |
|
|
Axolotl configs |
|
|
</summary> |
|
|
<div class="dropdown-content"> |
|
|
<p>Not optimized for cost / performance efficiency, YMMV.</p> |
|
|
<div class="config-title">SFT 1*H100</div> |
|
|
<pre><code># ==================== |
|
|
# MODEL CONFIGURATION |
|
|
# ==================== |
|
|
base_model: ConicCat/Mistral-Small-3.2-AntiRep-24B |
|
|
model_type: AutoModelForCausalLM |
|
|
tokenizer_type: AutoTokenizer |
|
|
chat_template: mistral_v7_tekken |
|
|
<br> |
|
|
# ==================== |
|
|
# DATASET CONFIGURATION |
|
|
# ==================== |
|
|
datasets: |
|
|
- path: ./dataset.jsonl |
|
|
type: chat_template |
|
|
split: train |
|
|
chat_template_strategy: tokenizer |
|
|
field_messages: messages |
|
|
message_property_mappings: |
|
|
role: role |
|
|
content: content |
|
|
roles: |
|
|
user: ["user"] |
|
|
assistant: ["assistant"] |
|
|
system: ["system"] |
|
|
|
|
|
dataset_prepared_path: |
|
|
train_on_inputs: false # Only train on assistant responses |
|
|
|
|
|
# ==================== |
|
|
# QLORA CONFIGURATION |
|
|
# ==================== |
|
|
adapter: qlora |
|
|
load_in_4bit: true |
|
|
lora_r: 128 |
|
|
lora_alpha: 128 |
|
|
lora_dropout: 0.1 |
|
|
lora_target_linear: true |
|
|
# lora_modules_to_save: # Uncomment only if you added NEW tokens |
|
|
|
|
|
# ==================== |
|
|
# TRAINING PARAMETERS |
|
|
# ==================== |
|
|
num_epochs: 3 |
|
|
micro_batch_size: 8 |
|
|
gradient_accumulation_steps: 1 |
|
|
learning_rate: 1e-5 |
|
|
optimizer: paged_adamw_8bit |
|
|
lr_scheduler: rex |
|
|
warmup_ratio: 0.05 |
|
|
weight_decay: 0.01 |
|
|
max_grad_norm: 1.0 |
|
|
|
|
|
# ==================== |
|
|
# SEQUENCE & PACKING |
|
|
# ==================== |
|
|
sequence_len: 8192 |
|
|
sample_packing: true |
|
|
eval_sample_packing: false |
|
|
pad_to_sequence_len: true |
|
|
|
|
|
# ==================== |
|
|
# HARDWARE OPTIMIZATIONS |
|
|
# ==================== |
|
|
bf16: auto |
|
|
flash_attention: true |
|
|
gradient_checkpointing: true |
|
|
|
|
|
# ==================== |
|
|
# EVALUATION & CHECKPOINTING |
|
|
# ==================== |
|
|
save_strategy: steps |
|
|
save_steps: 20 |
|
|
save_total_limit: 5 # Keep best + last few checkpoints |
|
|
load_best_model_at_end: true |
|
|
metric_for_best_model: eval_loss |
|
|
greater_is_better: false |
|
|
|
|
|
# ==================== |
|
|
# LOGGING & OUTPUT |
|
|
# ==================== |
|
|
output_dir: ./PT-SFT_1 |
|
|
logging_steps: 2 |
|
|
save_safetensors: true |
|
|
|
|
|
# ==================== |
|
|
# WANDB TRACKING |
|
|
# ==================== |
|
|
wandb_project: PF-SFT |
|
|
wandb_entity: your_entity |
|
|
wandb_name: run_name<p></p></code></pre> |
|
|
</div> |
|
|
</details> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</div> |
|
|
</body> |
|
|
</html> |