zerofata's picture
Update README.md
036f562 verified
---
base_model:
- zerofata/MS3.2-PaintedFantasy-Visage-v3-34B
---
<style>
.container {
--primary-accent: #C0C0C0;
--secondary-accent: #4A9EFF;
--glow-primary: rgba(192, 192, 192, 0.6);
--glow-secondary: rgba(74, 158, 255, 0.6);
--bg-main: #0B0A18;
--bg-container: #110F24;
--bg-card: rgba(20, 18, 40, 0.7);
--text-main: #DCDCDC;
--text-muted: #9E9E9E;
--white: #FFFFFF;
--border-color: #3C3A50;
--font-title: 'Cinzel', serif;
--font-body: 'EB Garamond', serif;
--font-code: 'Courier New', monospace;
font-family: var(--font-body);
color: var(--text-main);
line-height: 1.6;
font-weight: 400;
max-width: 1100px;
margin: 20px auto;
padding: 25px;
background-color: var(--bg-main);
background-image: linear-gradient(rgba(11, 10, 24, 0.95), rgba(11, 10, 24, 0.95)), url('https://www.transparenttextures.com/patterns/stardust.png');
min-height: calc(100vh - 40px);
border-radius: 8px;
box-shadow: 0 0 25px rgba(0,0,0,0.7);
border: 1px solid var(--border-color);
}
.container .title-container {
background: linear-gradient(135deg, rgba(20, 18, 40, 0.8), rgba(30, 28, 50, 0.6));
margin-bottom: 30px;
border: 1px solid var(--border-color);
border-radius: 6px;
padding: 25px;
text-align: center;
position: relative;
box-shadow: 0 5px 15px rgba(0,0,0,0.4);
overflow: hidden;
}
.container .title-main {
color: var(--white);
font-size: 2.5rem;
font-weight: 700;
margin: 0;
letter-spacing: 4px;
display: block;
text-transform: uppercase;
text-shadow: 0 0 4px var(--glow-primary), 0 0 8px var(--glow-primary), 0 0 12px var(--glow-primary);
font-family: var(--font-title);
}
.container .lemonade-text {
color: var(--secondary-accent);
text-shadow: 0 0 8px var(--glow-secondary);
}
.container .title-subtitle {
padding-left: 0;
margin-top: 15px;
}
.container .subtitle-text {
color: var(--text-muted);
font-size: 1.2rem;
font-family: var(--font-body);
font-style: italic;
font-weight: 400;
letter-spacing: 2px;
text-transform: uppercase;
opacity: 0.8;
}
.container img {
max-width: 100%;
border: 2px solid var(--border-color);
margin-bottom: 40px;
box-shadow: 0 5px 15px rgba(0,0,0,0.5);
border-radius: 4px;
}
.container .section-container {
margin-bottom: 25px;
padding-bottom: 25px;
border-bottom: 1px dashed var(--border-color);
}
.container .section-container:last-of-type {
border-bottom: none;
padding-bottom: 0;
margin-bottom: 0;
}
.container .section-header {
display: flex;
align-items: center;
padding: 0 0 15px 0;
}
.container .section-title {
font-family: var(--font-title);
background: linear-gradient(45deg, var(--secondary-accent), var(--primary-accent));
background-clip: text;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 1.4rem;
margin: 0 !important;
padding: 0 0 10px 0 !important;
letter-spacing: 1px;
font-weight: 700;
text-transform: uppercase;
border: none !important;
position: relative;
display: inline-block;
}
.container .section-title::after {
content: '';
position: absolute;
bottom: 0;
left: 0;
width: 100%;
height: 2px;
background-image: linear-gradient(to right, var(--secondary-accent), var(--primary-accent));
box-shadow: 0 0 6px var(--glow-secondary), 0 0 6px var(--glow-primary);
border-radius: 2px;
}
.container .section-content {
padding: 20px 0 0 0;
}
.container .subheading {
color: var(--secondary-accent);
font-size: 1.1rem;
margin-top: 20px;
margin-bottom: 12px;
font-weight: 700;
display: block;
text-transform: uppercase;
letter-spacing: 2px;
font-family: var(--font-title);
border-bottom: 1px solid var(--secondary-accent);
padding-bottom: 6px;
text-shadow: 0 0 4px var(--glow-secondary);
}
.container .data-box {
background-color: var(--bg-card);
padding: 15px;
border: 1px solid var(--border-color);
border-left: 2px solid var(--primary-accent);
margin-bottom: 15px;
box-shadow: inset 0 0 6px rgba(0,0,0,0.4);
border-radius: 4px;
font-size: 1rem;
}
.container .data-row {
display: flex;
align-items: center;
margin-bottom: 6px;
padding: 5px 0;
}
.container .data-row:last-child {
margin-bottom: 0;
}
.container .data-arrow {
color: var(--secondary-accent);
font-weight: bold;
margin-right: 10px;
font-family: var(--font-code);
font-size: 1rem;
}
.container .data-label {
color: var(--white);
font-weight: 600;
font-family: var(--font-body);
margin-right: 8px;
min-width: 80px;
}
.container a {
color: var(--primary-accent);
text-decoration: none;
font-weight: 600;
transition: all .2s;
}
.container .data-row a {
border-bottom: 1px dotted var(--primary-accent);
}
.container a:hover {
text-decoration: none;
color: var(--white);
text-shadow: 0 0 5px var(--glow-primary);
}
.container .data-row a:hover {
border-bottom-style: solid;
}
.container .dropdown-container {
margin-top: 20px;
}
.container .dropdown-summary {
cursor: pointer;
padding: 10px 0;
color: var(--text-muted);
font-size: 1.1rem;
font-weight: 700;
text-transform: none;
font-family: var(--font-title);
letter-spacing: 1px;
list-style: none;
transition: color 0.2s ease;
}
.container .dropdown-summary:hover {
color: var(--primary-accent);
}
.container .dropdown-arrow {
color: var(--secondary-accent);
margin-right: 10px;
transition: transform 0.2s ease;
}
.container .dropdown-content {
margin-top: 15px;
padding: 20px;
background-color: var(--bg-card);
border: 1px solid var(--border-color);
border-radius: 4px;
}
.container .config-title {
color: var(--text-muted);
font-size: 1rem;
margin-bottom: 10px;
font-family: var(--font-body);
text-transform: uppercase;
letter-spacing: 1px;
font-weight: 700;
}
.container pre {
background-color: #1c1c1c;
padding: 15px;
border: 1px solid var(--border-color);
white-space: pre-wrap;
word-wrap: break-word;
color: #c5c8c6;
border-radius: 4px;
box-shadow: inset 0 0 5px rgba(0,0,0,0.5);
}
.container pre code {
background: none;
color: inherit;
padding: 0;
border-radius: 0;
}
.container code {
font-family: var(--font-code);
color: var(--primary-accent);
background: var(--border-color);
padding: 2px 5px;
border-radius: 4px;
}
</style>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Painted Fantasy</title>
<link rel="preconnect" href="https://fonts.googleapis.com">
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
<link href="https://fonts.googleapis.com/css2?family=Cinzel:wght@400;700&family=MedievalSharp&family=EB+Garamond:ital,wght@0,400;0,500;1,400&display=swap" rel="stylesheet">
</head>
<body>
<div class="container">
<div class="title-container">
<div class="glitchy-overlay"></div>
<div class="title-wrapper">
<h1 class="title-main">
<span class="title-prefix">PAINTED FANTASY</span>
<span class="lemonade-text">VISAGE v3</span>
</h1>
<div class="title-subtitle">
<span class="subtitle-text">Mistrall Small 3.2 Upscaled 34B</span>
</div>
</div>
</div>
![image/png](https://cdn-uploads.huggingface.co/production/uploads/65b19c6c638328850e12d38c/CroIkC3MXC5gIghNjkEVg.png)
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Overview</h2>
</div>
<div class="section-content">
<p>No layer left behind edition.</p>
<p>Upscale redone with the missing final layer included. The original upscales were always missing a layer, but I never troubleshooted to identify *what* layer was missing. Turns out it was the final layer. That's kind of an important one.</p>
<p>This model is an uncensored, creative writing and RP model. Compared to the older version, it is smarter and I think has a bit less repetition. The old V2 version though is slightly more creative due to the instability it had.</p>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">SillyTavern Settings</h2>
</div>
<div class="section-content">
<h3 class="subheading">Recommended Roleplay Format</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Actions:</span>
<span>In plaintext</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dialogue:</span>
<span>"In quotes"</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Thoughts:</span>
<span>*In asterisks*</span>
</div>
</div>
<h3 class="subheading">Recommended Samplers</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Temp:</span>
<span>0.7-0.8</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">MinP:</span>
<span>0.05 - 0.1</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">TopP:</span>
<span>0.95</span>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<span class="data-label">Dry:</span>
<span>0.8, 1.75, 4</span>
</div>
</div>
<h3 class="subheading">Instruct</h3>
<div class="data-box">
<p style="margin: 0;">Mistral v7 Tekken</p>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Quantizations</h2>
</div>
<div class="section-content">
<div style="margin-bottom: 20px;">
<h3 class="subheading">GGUF</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/bartowski/zerofata_MS3.2-PaintedFantasy-Visage-v3-34B-GGUF">iMatrix (bartowski)</a>
</div>
</div>
</div>
<div>
<h3 class="subheading">EXL3</h3>
<div class="data-box">
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v3-34B-exl3-3bpw">3bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v3-34B-exl3-4bpw">4bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v3-34B-exl3-4.25bpw">4.25bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v3-34B-exl3-5bpw">5bpw</a>
</div>
<div class="data-row">
<span class="data-arrow">></span>
<a href="https://huggingface.co/zerofata/MS3.2-PaintedFantasy-Visage-v3-34B-exl3-6bpw">6bpw</a>
</div>
</div>
</div>
</div>
</div>
<div class="section-container">
<div class="section-header">
<div class="section-indicator"></div>
<h2 class="section-title">Creation Process</h2>
</div>
<div class="section-content">
<p>Creation Process: Upscale > CPT > SFT > DPO</p>
<p>Pretrained on approx 300MB of light novel and FineWeb-2 corpus.</p>
<p>SFT on approx 8 million tokens, SFW / NSFW RP, stories and creative instruct data.</p>
<p>DPO on a high quality RP / NSFW dataset with a focus on improving instruction following, reducing repetition and fixing common model mistakes.</p>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Mergekit configs
</summary>
<div class="dropdown-content">
<p>Merge configurations used during the model creation process.</p>
<div class="config-title">Upscale (Passthrough)</div>
<pre><code>base_model: ConicCat/Mistral-Small-3.2-AntiRep-24B
merge_method: passthrough
dtype: bfloat16
slices:
- sources:
- model: ConicCat/Mistral-Small-3.2-AntiRep-24B
layer_range: [0, 29]
- sources:
- model: ConicCat/Mistral-Small-3.2-AntiRep-24B
layer_range: [10, 40]</code></pre>
</div>
</details>
</div>
<div class="dropdown-container">
<details>
<summary class="dropdown-summary">
<span class="dropdown-arrow">></span>
Axolotl configs
</summary>
<div class="dropdown-content">
<p>Not optimized for cost / performance efficiency, YMMV.</p>
<div class="config-title">Pretrain 4*H100</div>
<pre><code>&#35; ====================
&#35; MODEL CONFIGURATION
&#35; ====================
base_model: ../mergekit/pf_v3_upscale
model_type: MistralForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
&#35; ====================
&#35; DATASET CONFIGURATION
&#35; ====================
datasets:
- path: ./data/pretrain_dataset_v5_stripped.jsonl
type: completion
<br>
dataset_prepared_path:
train_on_inputs: false &#35; Only train on assistant responses
<br>
&#35; ====================
&#35; QLORA CONFIGURATION
&#35; ====================
adapter: qlora
load_in_4bit: true
lora_r: 32
lora_alpha: 64
lora_dropout: 0.05
lora_target_linear: true
&#35; lora_modules_to_save: &#35; Uncomment only if you added NEW tokens
<br>
&#35; ====================
&#35; TRAINING PARAMETERS
&#35; ====================
num_epochs: 1
micro_batch_size: 4
gradient_accumulation_steps: 1
learning_rate: 4e-5
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.01
max_grad_norm: 1.0
<br>
&#35; ====================
&#35; SEQUENCE &amp; PACKING
&#35; ====================
sequence_len: 12288
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true
<br>
&#35; ====================
&#35; HARDWARE OPTIMIZATIONS
&#35; ====================
bf16: auto
flash_attention: true
gradient_checkpointing: offload
deepspeed: deepspeed_configs/zero1.json
<br>
plugins:
- axolotl.integrations.liger.LigerPlugin
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
cut_cross_entropy: true
liger_rope: true
liger_rms_norm: true
liger_layer_norm: true
liger_glu_activation: true
liger_cross_entropy: false &#35; Cut Cross Entropy overrides this
liger_fused_linear_cross_entropy: false &#35; Cut Cross Entropy overrides this
<br>
&#35; ====================
&#35; EVALUATION &amp; CHECKPOINTING
&#35; ====================
save_strategy: steps
save_steps: 40
save_total_limit: 5 &#35; Keep best + last few checkpoints
load_best_model_at_end: true
greater_is_better: false
<br>
&#35; ====================
&#35; LOGGING &amp; OUTPUT
&#35; ====================
output_dir: ./Visage-V3-PT-1
logging_steps: 2
save_safetensors: true
<br>
&#35; ====================
&#35; WANDB TRACKING
&#35; ====================
wandb_project: Visage-V3-PT
# wandb_entity: your_entity
wandb_name: Visage-V3-PT-1</code></pre>
<div class="config-title">SFT 4*H100</div>
<pre><code># ====================
# MODEL CONFIGURATION
# ====================
base_model: ./Visage-V3-PT-1/merged
model_type: MistralForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
<br>
# ====================
# DATASET CONFIGURATION
# ====================
datasets:
- path: ./data/dataset.jsonl
type: chat_template
split: train
chat_template_strategy: tokenizer
field_messages: messages
message_property_mappings:
role: role
content: content
roles:
user: ["user"]
assistant: ["assistant"]
system: ["system"]
<br>
dataset_prepared_path:
train_on_inputs: false # Only train on assistant responses
<br>
# ====================
# QLORA CONFIGURATION
# ====================
adapter: qlora
load_in_4bit: true
lora_r: 128
lora_alpha: 128
lora_dropout: 0.1
lora_target_linear: true
# lora_modules_to_save: # Uncomment only if you added NEW tokens
<br>
# ====================
# TRAINING PARAMETERS
# ====================
num_epochs: 3
micro_batch_size: 4
gradient_accumulation_steps: 1
learning_rate: 1e-5
optimizer: paged_adamw_8bit
lr_scheduler: rex
warmup_ratio: 0.05
weight_decay: 0.01
max_grad_norm: 1.0
<br>
# ====================
# SEQUENCE & PACKING
# ====================
sequence_len: 8192
sample_packing: true
pad_to_sequence_len: true
<br>
# ====================
# HARDWARE OPTIMIZATIONS
# ====================
bf16: auto
flash_attention: true
gradient_checkpointing: offload
deepspeed: deepspeed_configs/zero1.json
<br>
plugins:
- axolotl.integrations.liger.LigerPlugin
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
cut_cross_entropy: true
liger_rope: true
liger_rms_norm: true
liger_layer_norm: true
liger_glu_activation: true
liger_cross_entropy: false # Cut Cross Entropy overrides this
liger_fused_linear_cross_entropy: false # Cut Cross Entropy overrides this
<br>
# ====================
# EVALUATION & CHECKPOINTING
# ====================
save_strategy: steps
save_steps: 20
save_total_limit: 5 # Keep best + last few checkpoints
load_best_model_at_end: true
metric_for_best_model: eval_loss
greater_is_better: false
<br>
# ====================
# LOGGING & OUTPUT
# ====================
output_dir: ./Visage-V3-PT-1-SFT-2
logging_steps: 1
save_safetensors: true
<br>
# ====================
# WANDB TRACKING
# ====================
wandb_project: Visage-V3-SFT
# wandb_entity: your_entity
wandb_name: Visage-V3-PT-1-SFT-2</code></pre>
<div class="config-title">DPO 2*H200</div>
<pre><code># ====================
# MODEL CONFIGURATION
# ====================
base_model: ./Visage-V3-PT-1-SFT-2/merged
model_type: MistralForCausalLM
tokenizer_type: AutoTokenizer
chat_template: mistral_v7_tekken
<br>
# ====================
# RL/DPO CONFIGURATION
# ====================
rl: dpo
rl_beta: 0.085
<br>
# ====================
# DATASET CONFIGURATION
# ====================
datasets:
- path: ./data/handcrafted_dataset_mistral_rep.jsonl
type: chat_template.default
field_messages: messages
field_chosen: chosen
field_rejected: rejected
message_property_mappings:
role: role
content: content
roles:
system: ["system"]
user: ["user"]
assistant: ["assistant"]
- path: ./data/approved_automated_l3_dataset.jsonl
type: chat_template.default
field_messages: messages
field_chosen: chosen
field_rejected: rejected
message_property_mappings:
role: role
content: content
roles:
system: ["system"]
user: ["user"]
assistant: ["assistant"]
dataset_prepared_path:
train_on_inputs: false # Only train on assistant responses
<br>
# ====================
# QLORA CONFIGURATION
# ====================
adapter: lora
load_in_8bit: true
lora_r: 16
lora_alpha: 32
lora_dropout: 0.1
lora_target_linear: true
# lora_modules_to_save: # Uncomment only if you added NEW tokens
<br>
# ====================
# TRAINING PARAMETERS
# ====================
num_epochs: 1
micro_batch_size: 2
gradient_accumulation_steps: 4
learning_rate: 2e-6
optimizer: adamw_torch_fused
lr_scheduler: cosine
warmup_steps: 5
weight_decay: 0.01
max_grad_norm: 1.0
<br>
# ====================
# SEQUENCE CONFIGURATION
# ====================
sequence_len: 8192
pad_to_sequence_len: true
<br>
# ====================
# HARDWARE OPTIMIZATIONS
# ====================
bf16: auto
tf32: false
flash_attention: true
gradient_checkpointing: offload
<br>
plugins:
- axolotl.integrations.liger.LigerPlugin
- axolotl.integrations.cut_cross_entropy.CutCrossEntropyPlugin
cut_cross_entropy: true
liger_rope: true
liger_rms_norm: true
liger_layer_norm: true
liger_glu_activation: true
liger_cross_entropy: false # Cut Cross Entropy overrides this
liger_fused_linear_cross_entropy: false # Cut Cross Entropy overrides this
deepspeed: deepspeed_configs/zero1.json
<br>
# ====================
# CHECKPOINTING
# ====================
save_steps: 10
save_total_limit: 10
load_best_model_at_end: true
metric_for_best_model: eval_loss
greater_is_better: false
<br>
# ====================
# LOGGING & OUTPUT
# ====================
output_dir: ./Visage-V3-PT-1-SFT-2-DPO-2
logging_steps: 1
save_safetensors: true
<br>
# ====================
# WANDB TRACKING
# ====================
wandb_project: Visage-V3-DPO
# wandb_entity: your_entity
wandb_name: Visage-V3-PT-1-SFT-2-DPO-2</code></pre>
</div>
</details>
</div>
</div>
</div>
</div>
</body>
</html>