Spaces:

Heartsync
/

phoenix

Paused

App Files Files Community

seawolf2357 commited on Nov 20, 2025

Commit

c43a720

verified ·

1 Parent(s): 1fa5f7c

Update app.py

Browse files

Files changed (1) hide show

app.py +438 -989

app.py CHANGED Viewed

@@ -1,20 +1,20 @@
 """
-🔮 PHOENIX Retention Research Platform - PRODUCTION VERSION v1.4.3
-Complete Integrated Version with All Fixes
-✅ v1.4.3 CRITICAL FIX: forward() 시그니처 Transformers 호환
-✅ v1.4.2: Embedding Tying 저장 시점 처리
-✅ State Dict Direct Loading + Structure-Aware Burning + Embedding Tying Fix
 ✅ Model Structure Pre-Analysis
 ✅ Qwen3 Model Support
-✅ Zero-shot Conversion (No Dataset Required)
-✅ Optional Fine-tuning (Dataset-based)
 ✅ GQA Support
-✅ HuggingFace Hub Integration with Custom Code
-✅ Comprehensive Evaluation
-✅ Pre-upload Verification
-VIDraft AI Research Lab - Complete Integrated Version v1.4.3
 """
 import gradio as gr
@@ -31,13 +31,12 @@ import plotly.graph_objects as go
 import plotly.express as px
 import pandas as pd
 from typing import Dict, List, Any, Tuple, Optional
-import chromadb
-from chromadb.config import Settings
 from transformers import (
     AutoModel, AutoTokenizer, AutoConfig, AutoModelForCausalLM,
-    get_cosine_schedule_with_warmup, TrainingArguments, Trainer
 )
-from datasets import load_dataset
 from torch.utils.data import Dataset, DataLoader
 from accelerate import Accelerator
 from tqdm import tqdm
@@ -53,7 +52,6 @@ from huggingface_hub import HfApi, create_repo
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 STORAGE_PATH = "/data"
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
-VECTOR_DB_PATH = f"{STORAGE_PATH}/vector_store"
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
@@ -61,10 +59,9 @@ DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
 HF_TOKEN = os.getenv("HF_TOKEN")
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
-Path(VECTOR_DB_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
-print(f"🚀 PHOENIX Platform v1.4.3 initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 if HF_TOKEN:
@@ -77,10 +74,7 @@ else:
 # =====================================================
 def analyze_model_structure(model_url: str) -> Dict[str, Any]:
-    """
-    🔍 모델 구조 사전 분석
-    변환 전 모델의 레이어 구조를 파악합니다.
-    """
     print("\n" + "="*80)
     print("🔍 MODEL STRUCTURE ANALYSIS")
     print("="*80)
@@ -109,8 +103,6 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
             'num_attention_heads': config.num_attention_heads if hasattr(config, 'num_attention_heads') else 0,
             'num_hidden_layers': config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else 0,
             'num_key_value_heads': config.num_key_value_heads if hasattr(config, 'num_key_value_heads') else None,
-            'layer_structure': None,
-            'attention_type': 'unknown',
             'total_layers': 0,
             'has_self_attn': False,
             'layer_path': None,
@@ -125,7 +117,6 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
             ('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
-            ('model.decoder.layers', lambda m: m.model.decoder.layers if hasattr(m, 'model') and hasattr(m.model, 'decoder') and hasattr(m.model.decoder, 'layers') else None),
         ]
         for path_name, path_fn in possible_paths:
@@ -137,7 +128,7 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
                 break
         if layers is None:
-            print(f"   ❌ No layers found! Model structure unknown.")
             analysis['error'] = 'No layers found'
             return analysis
@@ -155,18 +146,13 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
                 attn = first_layer.self_attn
                 print(f"   ✅ Has self_attn")
-                print(f"   Attention class: {attn.__class__.__name__}")
-                analysis['attention_type'] = attn.__class__.__name__
                 if hasattr(attn, 'q_proj'):
                     q_shape = attn.q_proj.weight.shape
                     k_shape = attn.k_proj.weight.shape
-                    v_shape = attn.v_proj.weight.shape
                     print(f"   Q projection: {q_shape}")
                     print(f"   K projection: {k_shape}")
-                    print(f"   V projection: {v_shape}")
                     if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
                         head_dim = q_shape[0] // config.num_attention_heads
@@ -174,43 +160,15 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
                         print(f"   Calculated head_dim: {head_dim}")
                     if k_shape[0] != q_shape[0]:
-                        print(f"   ✅ GQA detected! (K/V heads < Q heads)")
                         analysis['gqa_detected'] = True
-                        if hasattr(config, 'num_key_value_heads') and config.num_key_value_heads > 0:
-                            kv_head_dim = k_shape[0] // config.num_key_value_heads
-                            analysis['kv_head_dim'] = kv_head_dim
-                            print(f"   Calculated kv_head_dim: {kv_head_dim}")
                     else:
-                        print(f"   Standard MHA (K/V heads == Q heads)")
                         analysis['gqa_detected'] = False
                     analysis['q_dim'] = q_shape[0]
                     analysis['k_dim'] = k_shape[0]
-                    analysis['v_dim'] = v_shape[0]
-                    analysis['o_in_dim'] = attn.o_proj.weight.shape[1] if hasattr(attn, 'o_proj') else None
-            else:
-                print(f"   ⚠️ No self_attn found in layer")
-                analysis['has_self_attn'] = False
-        print(f"\n{'='*80}")
-        print(f"📊 STRUCTURE ANALYSIS COMPLETE")
-        print(f"{'='*80}")
-        print(f"Model Type: {analysis['model_type']}")
-        print(f"Architecture: {analysis['architectures']}")
-        print(f"Total Layers: {analysis['total_layers']}")
-        print(f"Layer Path: {analysis['layer_path']}")
-        print(f"Has self_attn: {analysis['has_self_attn']}")
-        print(f"Attention Type: {analysis['attention_type']}")
-        if analysis.get('gqa_detected'):
-            print(f"✅ GQA Support: YES")
-            print(f"   Q dim: {analysis.get('q_dim')}")
-            print(f"   K dim: {analysis.get('k_dim')}")
-        else:
-            print(f"Standard MHA")
-        print(f"{'='*80}\n")
         del model
         torch.cuda.empty_cache()
@@ -226,7 +184,6 @@ def analyze_model_structure(model_url: str) -> Dict[str, Any]:
         return {
             'model_url': model_url,
             'error': str(e),
-            'traceback': error_msg,
             'total_layers': 0,
         }
@@ -246,7 +203,6 @@ class MultiScaleRetention(nn.Module):
         self.hidden_size = config.hidden_size
         self.num_heads = config.num_attention_heads
-        # ✅ FIX: head_dim을 config에서 가져오기
         if hasattr(config, 'head_dim'):
             self.head_dim = config.head_dim
         else:
@@ -263,9 +219,6 @@ class MultiScaleRetention(nn.Module):
         self.q_dim = self.num_heads * self.head_dim
         self.kv_dim = self.num_key_value_heads * self.kv_head_dim
-        self.register_buffer('_internal_state', None, persistent=False)
-        self.register_buffer('_state_initialized', torch.tensor(False), persistent=False)
         self.q_proj = nn.Linear(self.hidden_size, self.q_dim, bias=False)
         self.k_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
         self.v_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
@@ -289,11 +242,6 @@ class MultiScaleRetention(nn.Module):
             batch, num_key_value_heads, n_rep, slen, head_dim
         )
         return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
-    def reset_state(self):
-        """Reset internal state"""
-        self._internal_state = None
-        self._state_initialized = torch.tensor(False)
     def forward(
         self,
@@ -310,18 +258,12 @@ class MultiScaleRetention(nn.Module):
         """O(n) Retention with GQA support"""
         batch_size, seq_len, _ = hidden_states.shape
-        if past_key_values is not None:
-            past_key_value = past_key_values
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
         if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
-            self.q_proj = self.q_proj.to(device=target_device, dtype=target_dtype)
-            self.k_proj = self.k_proj.to(device=target_device, dtype=target_dtype)
-            self.v_proj = self.v_proj.to(device=target_device, dtype=target_dtype)
-            self.o_proj = self.o_proj.to(device=target_device, dtype=target_dtype)
-            self.group_norm = self.group_norm.to(device=target_device, dtype=target_dtype)
         query_states = self.q_proj(hidden_states)
         key_states = self.k_proj(hidden_states)
@@ -342,24 +284,17 @@ class MultiScaleRetention(nn.Module):
         key_states = self._repeat_kv(key_states, self.num_key_value_groups)
         value_states = self._repeat_kv(value_states, self.num_key_value_groups)
-        past_state = self._internal_state if (use_cache and self._state_initialized) else None
-        retention_states, new_state = self._compute_retention(
-            query_states, key_states, value_states, past_state
         )
-        if use_cache:
-            self._internal_state = new_state.detach()
-            self._state_initialized = torch.tensor(True)
         retention_states = retention_states.transpose(1, 2).contiguous()
         retention_states = retention_states.reshape(
             batch_size, seq_len, self.q_dim
         )
-        if not next(self.group_norm.parameters()).is_cuda and retention_states.is_cuda:
-            self.group_norm = self.group_norm.to(retention_states.device, dtype=retention_states.dtype)
-        elif next(self.group_norm.parameters()).dtype != retention_states.dtype:
-            self.group_norm = self.group_norm.to(dtype=retention_states.dtype)
         retention_states = self.group_norm(
             retention_states.transpose(1, 2)
@@ -376,19 +311,15 @@ class MultiScaleRetention(nn.Module):
         queries: torch.Tensor,
         keys: torch.Tensor,
         values: torch.Tensor,
-        past_state: Optional[torch.Tensor] = None
     ):
         """O(n) Retention computation"""
         batch_size, num_heads, seq_len, head_dim = queries.shape
-        if past_state is not None:
-            state = past_state.to(queries.device, dtype=queries.dtype)
-        else:
-            state = torch.zeros(
-                batch_size, num_heads, head_dim, head_dim,
-                dtype=queries.dtype,
-                device=queries.device
-            ) + 1e-6
         outputs = []
@@ -413,7 +344,7 @@ class MultiScaleRetention(nn.Module):
         output = torch.stack(outputs, dim=2)
-        return output, state
 class HierarchicalRetention(nn.Module):
@@ -436,15 +367,6 @@ class HierarchicalRetention(nn.Module):
         self.long_decay = 0.95
         self.norm = nn.LayerNorm(hidden_size)
-        if next(self.base_retention.parameters()).is_cuda:
-            device = next(self.base_retention.parameters()).device
-            dtype = next(self.base_retention.parameters()).dtype
-            self.short_proj = self.short_proj.to(device, dtype=dtype)
-            self.medium_proj = self.medium_proj.to(device, dtype=dtype)
-            self.long_proj = self.long_proj.to(device, dtype=dtype)
-            self.fusion = self.fusion.to(device, dtype=dtype)
-            self.norm = self.norm.to(device, dtype=dtype)
     def forward(
         self,
@@ -461,21 +383,12 @@ class HierarchicalRetention(nn.Module):
         """Hierarchical forward pass"""
         batch_size, seq_len, hidden_size = hidden_states.shape
-        if past_key_values is not None:
-            past_key_value = past_key_values
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
-        current_device = next(self.short_proj.parameters()).device
-        current_dtype = next(self.short_proj.parameters()).dtype
-        if current_device != target_device or current_dtype != target_dtype:
-            self.short_proj = self.short_proj.to(device=target_device, dtype=target_dtype)
-            self.medium_proj = self.medium_proj.to(device=target_device, dtype=target_dtype)
-            self.long_proj = self.long_proj.to(device=target_device, dtype=target_dtype)
-            self.fusion = self.fusion.to(device=target_device, dtype=target_dtype)
-            self.norm = self.norm.to(device=target_device, dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
@@ -519,11 +432,8 @@ class HierarchicalRetention(nn.Module):
 # =====================================================
 def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
-    """
-    Transformer Attention → PHOENIX Retention (GQA Support)
-    structure_info를 활용하여 더 정확한 변환 수행
-    """
-    print("🔄 Starting Attention → Retention conversion (GQA support)...")
     replaced_count = 0
     total_layers = 0
@@ -541,21 +451,11 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
         elif layer_path == 'transformer.h':
             if hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
                 layers = model.transformer.h
-        elif layer_path == 'layers':
-            if hasattr(model, 'layers'):
-                layers = model.layers
-        elif layer_path == 'model.decoder.layers':
-            if hasattr(model, 'model') and hasattr(model.model, 'decoder') and hasattr(model.model.decoder, 'layers'):
-                layers = model.model.decoder.layers
     if layers is None:
-        print(f"   Auto-detecting layer structure...")
         possible_paths = [
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
-            ('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
-            ('model.decoder.layers', lambda m: m.model.decoder.layers if hasattr(m, 'model') and hasattr(m.model, 'decoder') and hasattr(m.model.decoder, 'layers') else None),
         ]
         for path_name, path_fn in possible_paths:
@@ -567,42 +467,14 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
                 break
     if layers is None:
-        print("❌ Cannot find layers - model structure not supported")
         return model, 0, 0
     total_layers = len(layers)
-    print(f"   Found {total_layers} layers at '{layer_path}'")
-    if structure_info and structure_info.get('gqa_detected'):
-        print(f"   ✅ GQA detected from structure info")
-        if not hasattr(model.config, 'num_key_value_heads'):
-            num_kv_heads = structure_info.get('k_dim', 0) // (model.config.hidden_size // model.config.num_attention_heads)
-            if num_kv_heads > 0:
-                model.config.num_key_value_heads = num_kv_heads
-                print(f"   Set num_key_value_heads = {num_kv_heads}")
     if structure_info and structure_info.get('head_dim'):
         model.config.head_dim = structure_info['head_dim']
-        print(f"   ✅ Set head_dim = {structure_info['head_dim']} from structure info")
-    elif not hasattr(model.config, 'head_dim'):
-        first_layer = layers[0]
-        if hasattr(first_layer, 'self_attn'):
-            old_attn = first_layer.self_attn
-            if hasattr(old_attn, 'q_proj'):
-                q_shape = old_attn.q_proj.weight.shape
-                k_shape = old_attn.k_proj.weight.shape
-                head_dim = q_shape[0] // model.config.num_attention_heads
-                model.config.head_dim = head_dim
-                print(f"   ✅ Calculated head_dim = {head_dim} from layer weights")
-                if k_shape[0] != q_shape[0]:
-                    print(f"   ✅ GQA detected! (K/V dim: {k_shape[0]} < Q dim: {q_shape[0]})")
-                    if not hasattr(model.config, 'num_key_value_heads'):
-                        num_kv_heads = k_shape[0] // head_dim
-                        model.config.num_key_value_heads = num_kv_heads
-                        print(f"   Set num_key_value_heads = {num_kv_heads}")
     for layer_idx, layer in enumerate(layers):
         try:
@@ -616,60 +488,19 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
                 if hasattr(old_attn, 'q_proj'):
                     try:
-                        if use_hierarchical:
-                            target = new_retention.base_retention
-                        else:
-                            target = new_retention
-                        q_match = old_attn.q_proj.weight.shape == target.q_proj.weight.shape
-                        k_match = old_attn.k_proj.weight.shape == target.k_proj.weight.shape
-                        v_match = old_attn.v_proj.weight.shape == target.v_proj.weight.shape
-                        o_match = old_attn.o_proj.weight.shape == target.o_proj.weight.shape
-                        if layer_idx == 0:
-                            print(f"  🔍 Layer 0 shape analysis:")
-                            print(f"     Old Q: {old_attn.q_proj.weight.shape} vs New Q: {target.q_proj.weight.shape} → {'✅' if q_match else '❌'}")
-                            print(f"     Old K: {old_attn.k_proj.weight.shape} vs New K: {target.k_proj.weight.shape} → {'✅' if k_match else '❌'}")
-                            print(f"     Old V: {old_attn.v_proj.weight.shape} vs New V: {target.v_proj.weight.shape} → {'✅' if v_match else '❌'}")
-                            print(f"     Old O: {old_attn.o_proj.weight.shape} vs New O: {target.o_proj.weight.shape} → {'✅' if o_match else '❌'}")
-                        if q_match and k_match and v_match and o_match:
-                            target.q_proj.weight.data = old_attn.q_proj.weight.data.clone()
-                            target.k_proj.weight.data = old_attn.k_proj.weight.data.clone()
-                            target.v_proj.weight.data = old_attn.v_proj.weight.data.clone()
-                            target.o_proj.weight.data = old_attn.o_proj.weight.data.clone()
-                            if layer_idx == 0:
-                                print(f"  ✅ Layer {layer_idx}: Perfect match - weights copied")
-                        elif q_match and o_match:
-                            target.q_proj.weight.data = old_attn.q_proj.weight.data.clone()
-                            target.o_proj.weight.data = old_attn.o_proj.weight.data.clone()
-                            k_copy_size = min(old_attn.k_proj.weight.shape[0], target.k_proj.weight.shape[0])
-                            v_copy_size = min(old_attn.v_proj.weight.shape[0], target.v_proj.weight.shape[0])
-                            target.k_proj.weight.data[:k_copy_size] = old_attn.k_proj.weight.data[:k_copy_size].clone()
-                            target.v_proj.weight.data[:v_copy_size] = old_attn.v_proj.weight.data[:v_copy_size].clone()
-                            if layer_idx == 0:
-                                print(f"  ✅ Layer {layer_idx}: Partial match (GQA) - partial weights copied")
-                        else:
-                            nn.init.xavier_uniform_(target.q_proj.weight)
-                            nn.init.xavier_uniform_(target.k_proj.weight)
-                            nn.init.xavier_uniform_(target.v_proj.weight)
-                            nn.init.xavier_uniform_(target.o_proj.weight)
-                            if layer_idx == 0:
-                                print(f"  ⚠️ Layer {layer_idx}: Shape mismatch - Xavier init used")
-                    except Exception as e:
-                        print(f"  ⚠️ Layer {layer_idx}: Weight copy failed - {e}")
                 layer.self_attn = new_retention
                 replaced_count += 1
         except Exception as e:
-            print(f"  ❌ Layer {layer_idx}: Failed - {e}")
             continue
     print(f"\n✅ Conversion complete: {replaced_count}/{total_layers} layers")
@@ -678,18 +509,171 @@ def replace_attention_with_retention(model, use_hierarchical=True, structure_inf
 # =====================================================
-# Custom Modeling Code 생성 (v1.4.3 FIXED)
 # =====================================================
 def generate_modeling_phoenix_code():
-    """PHOENIX Custom Modeling Code v1.4.3 - forward() 시그니처 Transformers 호환"""
     return '''"""
-PHOENIX Retention Model v1.4.3
-✅ v1.4.3 CRITICAL FIX: forward() 시그니처 Transformers 호환
-✅ v1.4.3 HOTFIX: dtype 불일치 수정 (bfloat16 지원)
-✅ PhoenixPreTrainedModel 베이스 클래스 포함
-✅ 모든 Retention 클래스 완전 구현
 """
 import torch
@@ -703,7 +687,7 @@ import os
 class PhoenixConfig(PretrainedConfig):
     model_type = "phoenix"
-    def __init__(self, use_phoenix_retention=True, phoenix_version="1.4.3",
                  original_model=None, use_hierarchical=True, **kwargs):
         super().__init__(**kwargs)
         self.use_phoenix_retention = use_phoenix_retention
@@ -735,21 +719,10 @@ class MultiScaleRetention(nn.Module):
         if n == 1: return x
         return x[:, :, None, :, :].expand(b, h, n, s, d).reshape(b, h*n, s, d)
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        output_attentions: bool = False,
-        use_cache: bool = False,
-        cache_position: Optional[torch.Tensor] = None,
-        **kwargs
-    ):
         b, s, _ = hidden_states.shape
         device, dtype = hidden_states.device, hidden_states.dtype
-        # ✅ FIX: dtype과 device 모두 일치시킴
         if self.q_proj.weight.device != device or self.q_proj.weight.dtype != dtype:
             self.to(device=device, dtype=dtype)
@@ -790,22 +763,11 @@ class HierarchicalRetention(nn.Module):
         self.norm = nn.LayerNorm(h)
         self.decays = [0.5, 0.8, 0.95]
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-        position_ids: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        output_attentions: bool = False,
-        use_cache: bool = False,
-        cache_position: Optional[torch.Tensor] = None,
-        **kwargs
-    ):
         b, s, h = hidden_states.shape
         device, dtype = hidden_states.device, hidden_states.dtype
-        # ✅ FIX: dtype과 device 모두 일치시킴
-        if next(self.short_proj.parameters()).device != device or next(self.short_proj.parameters()).dtype != dtype:
             self.to(device=device, dtype=dtype)
         ret_out = self.base_retention(hidden_states)[0]
@@ -825,10 +787,9 @@ class HierarchicalRetention(nn.Module):
 def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
     layers = getattr(model, 'model', model)
-    layers = getattr(layers, 'layers', getattr(layers, 'h', getattr(layers, 'layers', None)))
     if layers is None: return model, 0, 0
-    # ✅ FIX: 원본 모델의 dtype 감지
     original_dtype = None
     for param in model.parameters():
         original_dtype = param.dtype
@@ -837,33 +798,16 @@ def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
     cnt = 0
     for i, layer in enumerate(layers):
         if hasattr(layer, 'self_attn'):
-            # 새 Retention 생성
-            new_retention = HierarchicalRetention(model.config, i) if use_hierarchical else MultiScaleRetention(model.config, i)
-            # ✅ FIX: 원본 dtype으로 변환
-            if original_dtype is not None:
-                new_retention = new_retention.to(dtype=original_dtype)
-            layer.self_attn = new_retention
             cnt += 1
     return model, cnt, len(layers)
-# ✅ CRITICAL: PhoenixPreTrainedModel 베이스 클래스
 class PhoenixPreTrainedModel(PreTrainedModel):
     config_class = PhoenixConfig
     base_model_prefix = "phoenix"
-    supports_gradient_checkpointing = True
-    _no_split_modules = ["MultiScaleRetention", "HierarchicalRetention"]
-    def _init_weights(self, m):
-        std = getattr(self.config, 'initializer_range', 0.02)
-        if isinstance(m, nn.Linear):
-            m.weight.data.normal_(0, std)
-            if m.bias is not None: m.bias.data.zero_()
-        elif isinstance(m, nn.Embedding):
-            m.weight.data.normal_(0, std)
-            if m.padding_idx: m.weight.data[m.padding_idx].zero_()
 class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
@@ -874,7 +818,7 @@ class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
     @classmethod
     def from_pretrained(cls, path, *args, **kwargs):
-        print(f"🔥 PHOENIX v1.4.3 loading from {path}")
         config = AutoConfig.from_pretrained(path, trust_remote_code=True)
         orig = getattr(config, 'original_model', 'Qwen/Qwen3-0.6B')
         hier = getattr(config, 'use_hierarchical', True)
@@ -888,7 +832,6 @@ class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
         model, conv, tot = replace_attention_with_retention_for_loading(model, hier)
         print(f"   ✅ Converted {conv}/{tot} layers")
-        # 가중치 로드
         sd = None
         if os.path.exists(path):
             for fname in ["model.safetensors", "pytorch_model.bin"]:
@@ -925,7 +868,7 @@ class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
         inst = cls(config)
         inst._model = model
         inst._ready = True
-        print(f"✅ PHOENIX v1.4.3 ready!")
         return inst
     def forward(self, *a, **k):
@@ -939,132 +882,61 @@ class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
 AutoConfig.register("phoenix", PhoenixConfig)
 '''
-    return modeling_code
 # =====================================================
-# 저장 함수 (v1.4.3)
 # =====================================================
 def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
-    """PHOENIX 모델을 Custom Code와 함께 저장 v1.4.3"""
     output_path = Path(output_path)
     output_path.mkdir(parents=True, exist_ok=True)
-    print(f"\n💾 Saving PHOENIX model with custom code...")
-    # ✅ Embedding Tying 처리 - 저장 전에 실제로 tie!
     if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
-        print(f"   🔗 Embedding Tying: True")
-        if hasattr(model, 'lm_head') and hasattr(model, 'model'):
-            if hasattr(model.model, 'embed_tokens'):
-                is_already_tied = model.lm_head.weight is model.model.embed_tokens.weight
-                if not is_already_tied:
-                    print(f"   ⚠️ lm_head and embed_tokens are NOT tied - fixing now...")
-                    print(f"      Before: lm_head mean={model.lm_head.weight.mean():.6f}, std={model.lm_head.weight.std():.6f}")
-                    # CRITICAL: Tie the weights
-                    model.lm_head.weight = model.model.embed_tokens.weight
-                    print(f"      After: lm_head mean={model.lm_head.weight.mean():.6f}, std={model.lm_head.weight.std():.6f}")
-                    print(f"   ✅ Successfully tied lm_head.weight to embed_tokens.weight")
-                else:
-                    print(f"   ✅ Already tied (lm_head is embed_tokens)")
-                final_tied = model.lm_head.weight is model.model.embed_tokens.weight
-                print(f"   🔍 Final verification: Tied = {final_tied}")
-                if not final_tied:
-                    print(f"   ❌ WARNING: Tying verification FAILED!")
-                else:
-                    print(f"   ✅ Tying verification PASSED")
-    else:
-        print(f"   ⚠️ tie_word_embeddings not enabled or not found")
-    # 모델과 토크나이저 저장
     model.save_pretrained(output_path)
     tokenizer.save_pretrained(output_path)
-    print(f"   ✅ Model weights saved")
-    # Custom modeling code 저장
     modeling_code = generate_modeling_phoenix_code()
-    with open(output_path / "modeling_phoenix.py", "w", encoding='utf-8') as f:
         f.write(modeling_code)
-    print(f"   ✅ Custom modeling code saved (modeling_phoenix.py)")
-    # config.json 수정
     config_path = output_path / "config.json"
     if config_path.exists():
-        with open(config_path, "r", encoding='utf-8') as f:
             config_dict = json.load(f)
         config_dict["use_phoenix_retention"] = True
-        config_dict["phoenix_version"] = "1.4.3"
         config_dict["original_model"] = original_model_url
-        config_dict["use_hierarchical"] = metadata.get('use_hierarchical', True)
-        if hasattr(model.config, 'tie_word_embeddings'):
-            config_dict["tie_word_embeddings"] = model.config.tie_word_embeddings
         config_dict["auto_map"] = {
             "AutoModelForCausalLM": "modeling_phoenix.PhoenixModelForCausalLM",
         }
-        with open(config_path, "w", encoding='utf-8') as f:
             json.dump(config_dict, f, indent=2)
-        print(f"   ✅ Config updated with PHOENIX markers and auto_map")
-    # Metadata 저장
-    metadata['phoenix_version'] = '1.4.3'
-    with open(output_path / 'phoenix_metadata.json', 'w', encoding='utf-8') as f:
         json.dump(metadata, f, indent=2)
-    print(f"   ✅ Metadata saved")
-    # README 생성
-    readme_content = f"""---
-license: apache-2.0
-library_name: transformers
-tags:
-- PHOENIX
-- Retention
-- O(n) Complexity
-- VIDraft
-pipeline_tag: text-generation
----
-# 🔥 PHOENIX Retention Model v1.4.3
-This model has been converted from [{original_model_url}]({original_model_url}) using PHOENIX Retention mechanism.
-## ⚡ What's New in v1.4.3
-- ✅ **CRITICAL FIX: forward() Signature** - Transformers 호환성 완벽 수정
-- ✅ **Generation Fixed** - 정상적인 텍스트 생성
-- ✅ **Qwen3 Support** - 작은 모델 완벽 지원
-- ✅ **Embedding Tying** - 자동 처리
-## Model Information
-- **Original Model**: {original_model_url}
-- **PHOENIX Version**: 1.4.3
-- **Conversion Rate**: {metadata.get('conversion_rate', 0)*100:.1f}%
-- **Quality Score**: {metadata.get('quality_score', 0):.2f}/1.00
-- **Burning Type**: {metadata.get('burning_type', 'zero_shot')}
-- **Hierarchical**: {metadata.get('use_hierarchical', True)}
 ## Features
-✅ **O(n) Complexity**: Linear attention mechanism
-✅ **GQA Support**: Grouped Query Attention compatible
-✅ **Hierarchical Memory**: Multi-scale temporal dependencies
-✅ **Fixed forward() Signature**: Perfect Transformers compatibility
 ## Usage
-### ⚠️ Important: trust_remote_code=True Required!
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
@@ -1074,148 +946,40 @@ model = AutoModelForCausalLM.from_pretrained(
     torch_dtype="auto",
     device_map="auto"
 )
-tokenizer = AutoTokenizer.from_pretrained("{output_path.name}")
-inputs = tokenizer("The future of AI is", return_tensors="pt")
-outputs = model.generate(**inputs, max_new_tokens=50)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
-```
-## Citation
-```bibtex
-@software{{phoenix_retention,
-  title = {{PHOENIX Retention Research Platform}},
-  author = {{VIDraft AI Research Lab}},
-  year = {{2025}},
-  url = {{https://github.com/vidraft}},
-  version = {{1.4.3}}
-}}
 ```
-## License
-Apache 2.0 (inherited from original model)
----
-**VIDraft AI Research Lab** | Powered by PHOENIX 🔥 v1.4.3
 """
-    with open(output_path / "README.md", "w", encoding='utf-8') as f:
-        f.write(readme_content)
-    print(f"   ✅ README.md created")
-    print(f"\n✅ PHOENIX model package complete!")
-    print(f"   📦 Location: {output_path}")
 # =====================================================
-# 검증 및 업로드 함수들
 # =====================================================
-def verify_phoenix_model_before_upload(model_path: str) -> Tuple[bool, str, Dict]:
-    """Upload 전 PHOENIX 모델 검증"""
-    print("\n🧪 Pre-upload Verification...")
-    try:
-        model_path = Path(model_path)
-        file_checks = {
-            'config': (model_path / 'config.json').exists(),
-            'modeling': (model_path / 'modeling_phoenix.py').exists(),
-            'readme': (model_path / 'README.md').exists(),
-            'safetensors': (model_path / 'model.safetensors').exists(),
-            'pytorch_bin': (model_path / 'pytorch_model.bin').exists(),
-        }
-        model_weights_exist = file_checks['safetensors'] or file_checks['pytorch_bin']
-        print(f"   📄 File Check:")
-        print(f"      config.json: {'✅' if file_checks['config'] else '❌'}")
-        print(f"      modeling_phoenix.py: {'✅' if file_checks['modeling'] else '❌'}")
-        print(f"      README.md: {'✅' if file_checks['readme'] else '❌'}")
-        print(f"      model weights: {'✅' if model_weights_exist else '❌'}")
-        if not file_checks['config'] or not file_checks['modeling'] or not model_weights_exist:
-            return False, "❌ Missing required files", {}
-        with open(model_path / 'config.json', 'r') as f:
-            config = json.load(f)
-        if not config.get('use_phoenix_retention'):
-            return False, "❌ PHOENIX marker not found", {}
-        if 'auto_map' not in config:
-            return False, "❌ auto_map not configured", {}
-        print("   ✅ Config validated")
-        metrics = {
-            'retention_layers': -1,
-            'total_layers': -1,
-            'retention_rate': 1.0,
-            'generation_quality': 0.8,
-            'model_format': 'safetensors' if file_checks['safetensors'] else 'pytorch_bin',
-            'verification_mode': 'file_only'
-        }
-        print("   ✅ File-based verification passed")
-        return True, "✅ All checks passed", metrics
-    except Exception as e:
-        import traceback
-        error_msg = traceback.format_exc()
-        return False, f"❌ Verification failed: {str(e)}\n{error_msg}", {}
 def upload_to_huggingface_hub(
     model_path: str,
     original_model_url: str,
     repo_name: str = None,
     private: bool = True,
     token: str = None,
-    skip_verification: bool = False
 ) -> Tuple[bool, str, str]:
-    """Upload PHOENIX model to HuggingFace Hub"""
-    print("\n" + "="*80)
-    print("📤 HUGGINGFACE HUB UPLOAD")
-    print("="*80)
     if token is None:
         token = HF_TOKEN
     if not token:
-        error_msg = "❌ HF_TOKEN not found"
-        print(f"\n{error_msg}")
-        return False, "", error_msg
-    print(f"✅ HF_TOKEN found: {'*' * 10}{token[-4:]}")
-    model_path = Path(model_path)
-    if not model_path.exists():
-        error_msg = f"❌ Model path not found: {model_path}"
-        print(f"\n{error_msg}")
-        return False, "", error_msg
-    if not skip_verification:
-        print("\n🔍 Running pre-upload verification...")
-        success, message, metrics = verify_phoenix_model_before_upload(str(model_path))
-        if not success:
-            error_msg = f"❌ Pre-upload verification failed:\n{message}"
-            print(f"\n{error_msg}")
-            return False, "", error_msg
-        print(f"✅ Pre-upload verification PASSED!")
     try:
-        print("\n🔐 Authenticating with HuggingFace...")
         api = HfApi(token=token)
         user_info = api.whoami(token=token)
         username = user_info['name']
-        print(f"✅ Authenticated as: {username}")
         if not repo_name:
             base_name = original_model_url.split('/')[-1]
@@ -1223,7 +987,6 @@ def upload_to_huggingface_hub(
         repo_id = f"{username}/{repo_name}"
-        print(f"\n📦 Creating/verifying repository...")
         create_repo(
             repo_id=repo_id,
             token=token,
@@ -1231,9 +994,7 @@ def upload_to_huggingface_hub(
             repo_type="model",
             exist_ok=True
         )
-        print(f"✅ Repository ready: {repo_id}")
-        print(f"\n📤 Uploading files...")
         api.upload_folder(
             folder_path=str(model_path),
             repo_id=repo_id,
@@ -1243,37 +1004,23 @@ def upload_to_huggingface_hub(
         hub_url = f"https://huggingface.co/{repo_id}"
-        print(f"\n{'='*80}")
-        print(f"✅ UPLOAD SUCCESSFUL!")
-        print(f"{'='*80}")
-        print(f"🔗 Model URL: {hub_url}")
-        print(f"{'='*80}\n")
-        return True, hub_url, f"✅ Successfully uploaded to {hub_url}"
     except Exception as e:
-        import traceback
-        error_msg = traceback.format_exc()
-        print(f"\n{'='*80}")
-        print(f"❌ UPLOAD FAILED")
-        print(f"{'='*80}")
-        print(f"{error_msg}")
-        print(f"{'='*80}\n")
-        return False, "", f"❌ Upload failed: {str(e)}\n\n{error_msg}"
 # =====================================================
 # 평가 함수
 # =====================================================
-def evaluate_model_quality(model, tokenizer, test_prompts=None):
-    """간단한 모델 품질 평가"""
-    if test_prompts is None:
-        test_prompts = [
-            "The capital of France is",
-            "In machine learning, overfitting means",
-            "2 + 2 =",
-        ]
     model.eval()
     scores = []
@@ -1293,46 +1040,46 @@ def evaluate_model_quality(model, tokenizer, test_prompts=None):
                 score = 0.0
                 if len(generated) > len(prompt):
                     score += 0.3
-                if not any(char in generated[len(prompt):] for char in ['�', '[UNK]']):
                     score += 0.3
                 if len(generated.split()) > len(prompt.split()) + 2:
                     score += 0.4
                 scores.append(score)
-            except Exception as e:
-                print(f"  ⚠️ Evaluation error for '{prompt}': {e}")
                 scores.append(0.0)
     return sum(scores) / len(scores) if scores else 0.0
 # =====================================================
-# 버닝 함수들
 # =====================================================
-def burn_model_zero_shot(
     model_url: str,
     output_dir: str,
     use_hierarchical: bool = True,
-    test_prompts: List[str] = None,
 ):
-    """Zero-shot Model Burning with Structure Analysis"""
     print("="*80)
-    print("🔥 PHOENIX Zero-shot Model Burning v1.4.3")
     print("="*80)
     output_path = Path(output_dir)
     output_path.mkdir(parents=True, exist_ok=True)
     try:
-        print(f"\n🔍 STEP 1: Model Structure Analysis...")
         structure_info = analyze_model_structure(model_url)
-        if structure_info.get('error'):
-            print(f"⚠️ Structure analysis failed, continuing anyway...")
-            structure_info = None
-        print(f"\n📥 STEP 2: Loading model for conversion...")
         start_time = time.time()
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
@@ -1349,6 +1096,7 @@ def burn_model_zero_shot(
         load_time = time.time() - start_time
         print(f"✅ Loaded in {load_time:.1f}s")
         print(f"\n🔄 STEP 3: Converting Attention → Retention...")
         convert_start = time.time()
@@ -1361,40 +1109,48 @@ def burn_model_zero_shot(
         convert_time = time.time() - convert_start
         conversion_rate = converted / total if total > 0 else 0
-        print(f"✅ Converted {converted}/{total} layers ({conversion_rate*100:.1f}%) in {convert_time:.1f}s")
-        if converted == 0:
-            print(f"\n⚠️ WARNING: No layers were converted!")
-        print(f"\n📊 STEP 4: Evaluating model quality...")
-        eval_start = time.time()
-        quality_score = evaluate_model_quality(model, tokenizer, test_prompts)
-        eval_time = time.time() - eval_start
-        print(f"✅ Quality Score: {quality_score:.2f}/1.00 (in {eval_time:.1f}s)")
-        print(f"\n💾 STEP 5: Saving PHOENIX model with custom code...")
-        save_start = time.time()
         metadata = {
-            'phoenix_version': '1.4.3',
             'original_model': model_url,
             'use_hierarchical': use_hierarchical,
             'conversion_rate': conversion_rate,
-            'layers_converted': converted,
-            'total_layers': total,
             'quality_score': quality_score,
-            'burning_type': 'zero_shot',
-            'structure_info': structure_info,
             'timestamp': datetime.now().isoformat(),
         }
         save_phoenix_model_with_code(model, tokenizer, output_path, model_url, metadata)
-        save_time = time.time() - save_start
-        print(f"✅ Saved to {output_path} in {save_time:.1f}s")
         total_time = time.time() - start_time
         result = {
@@ -1403,124 +1159,73 @@ def burn_model_zero_shot(
             'conversion_rate': conversion_rate,
             'quality_score': quality_score,
             'total_time': total_time,
-            'load_time': load_time,
-            'convert_time': convert_time,
-            'eval_time': eval_time,
-            'save_time': save_time,
             'structure_info': structure_info,
         }
         print(f"\n{'='*80}")
-        print(f"✅ Zero-shot Burning Complete!")
-        print(f"   Total Time: {total_time:.1f}s")
-        print(f"   Model Path: {output_path}")
         print(f"   Quality: {quality_score:.2f}/1.00")
-        print(f"   Conversion: {converted}/{total} layers")
         print(f"{'='*80}\n")
         return result
     except Exception as e:
         import traceback
-        error_msg = traceback.format_exc()
-        print(f"\n❌ Zero-shot burning failed:\n{error_msg}")
         return {
             'status': 'failed',
             'error': str(e),
-            'traceback': error_msg
         }
 # =====================================================
-# 데이터베이스
 # =====================================================
 class ExperimentDatabase:
-    """SQLite database"""
     def __init__(self, db_path: str):
         self.db_path = db_path
         self.init_database()
-        self.migrate_database()
     def init_database(self):
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
-            cursor.execute("""
-                CREATE TABLE IF NOT EXISTS experiments (
-                    id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    model_type TEXT NOT NULL,
-                    sequence_length INTEGER,
-                    use_hierarchical BOOLEAN,
-                    attention_replaced BOOLEAN,
-                    layers_converted INTEGER,
-                    total_layers INTEGER,
-                    elapsed_time REAL,
-                    memory_mb REAL,
-                    throughput REAL,
-                    config_json TEXT,
-                    metrics_json TEXT,
-                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
-                )
-            """)
             cursor.execute("""
                 CREATE TABLE IF NOT EXISTS burning_history (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
-                    model_url TEXT NOT NULL,
-                    output_path TEXT NOT NULL,
                     hub_url TEXT,
-                    use_hierarchical BOOLEAN,
-                    dataset_used BOOLEAN,
                     conversion_rate REAL,
-                    training_steps INTEGER,
-                    final_loss REAL,
-                    evaluation_score REAL,
-                    verification_passed BOOLEAN,
                     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                 )
             """)
             conn.commit()
-    def migrate_database(self):
-        with sqlite3.connect(self.db_path) as conn:
-            cursor = conn.cursor()
-            cursor.execute("PRAGMA table_info(burning_history)")
-            columns = [col[1] for col in cursor.fetchall()]
-            if 'hub_url' not in columns:
-                cursor.execute("ALTER TABLE burning_history ADD COLUMN hub_url TEXT")
-            if 'verification_passed' not in columns:
-                cursor.execute("ALTER TABLE burning_history ADD COLUMN verification_passed BOOLEAN DEFAULT 0")
-            conn.commit()
-    def save_burning(self, burning_info: Dict) -> int:
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
             cursor.execute("""
-                INSERT INTO burning_history (
-                    model_url, output_path, hub_url, use_hierarchical,
-                    dataset_used, conversion_rate, training_steps,
-                    final_loss, evaluation_score, verification_passed
-                ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
             """, (
-                burning_info.get('model_url'),
-                burning_info.get('output_path'),
-                burning_info.get('hub_url'),
-                burning_info.get('use_hierarchical'),
-                burning_info.get('dataset_used'),
-                burning_info.get('conversion_rate'),
-                burning_info.get('training_steps', 0),
-                burning_info.get('final_loss'),
-                burning_info.get('evaluation_score'),
-                burning_info.get('verification_passed', False),
             ))
             conn.commit()
             return cursor.lastrowid
-    def get_burning_history(self, limit: int = 20) -> List[Dict]:
         with sqlite3.connect(self.db_path) as conn:
             conn.row_factory = sqlite3.Row
             cursor = conn.cursor()
@@ -1528,420 +1233,211 @@ class ExperimentDatabase:
             return [dict(row) for row in cursor.fetchall()]
 # =====================================================
-# Gradio UI Functions
 # =====================================================
 def burn_phoenix_model_ui(
     model_url,
     use_hierarchical,
-    dataset_path,
     output_name,
-    use_finetuning,
-    num_epochs,
-    batch_size,
-    learning_rate,
-    max_steps,
-    upload_to_hub,
-    hub_repo_name,
     hub_private,
 ):
-    """Gradio UI용 모델 버닝 함수"""
-    print("\n" + "="*80)
-    print("🔥 PHOENIX MODEL BURNING START v1.4.3")
-    print("="*80)
     try:
         if not model_url.strip():
-            return "⚠️ Model URL is required", None
         if not output_name.strip():
             output_name = f"phoenix_{model_url.split('/')[-1]}_{int(time.time())}"
         output_dir = f"{MODELS_PATH}/{output_name}"
-        print(f"📋 Configuration:")
-        print(f"   Model URL: {model_url}")
-        print(f"   Output Name: {output_name}")
-        print(f"   Hierarchical: {use_hierarchical}")
-        print(f"   Upload to Hub: {upload_to_hub}")
-        # Burning 실행 (zero-shot만 구현)
-        result = burn_model_zero_shot(
             model_url=model_url,
             output_dir=output_dir,
             use_hierarchical=use_hierarchical,
         )
         if result['status'] != 'success':
-            error_msg = f"❌ Burning Failed\n```\n{result.get('error', 'Unknown error')}\n```"
-            return error_msg, None
-        # Hub 업로드
         hub_url = None
-        verification_passed = False
-        upload_status = "Not attempted"
-        if upload_to_hub:
-            if not HF_TOKEN:
-                upload_status = "❌ Failed - No HF_TOKEN"
-            else:
-                success, hub_url, upload_msg = upload_to_huggingface_hub(
-                    model_path=result['model_path'],
-                    original_model_url=model_url,
-                    repo_name=hub_repo_name if hub_repo_name.strip() else None,
-                    private=hub_private,
-                    skip_verification=False
-                )
-                verification_passed = success
-                upload_status = f"✅ Uploaded to {hub_url}" if success else f"❌ Upload failed"
-        else:
-            upload_status = "⏭️ Skipped"
-        # DB 저장
-        burning_info = {
             'model_url': model_url,
             'output_path': result['model_path'],
             'hub_url': hub_url,
-            'use_hierarchical': use_hierarchical,
-            'dataset_used': False,
-            'conversion_rate': result.get('conversion_rate', 0.0),
-            'training_steps': 0,
-            'final_loss': None,
-            'evaluation_score': result.get('quality_score', 0.0),
-            'verification_passed': verification_passed,
-        }
-        db.save_burning(burning_info)
-        # 결과 포맷팅
-        structure_info = result.get('structure_info', {})
         output_md = f"""
-# 🔥 Model Burning Complete! (v1.4.3)
-## 🔍 Structure Analysis
-- **Model Type**: {structure_info.get('model_type', 'unknown')}
-- **Architecture**: {structure_info.get('architectures', 'unknown')}
-- **Total Layers**: {structure_info.get('total_layers', 0)}
-- **GQA Detected**: {structure_info.get('gqa_detected', False)}
-## 📦 Model Information
-- **Original Model**: {model_url}
-- **Output Path**: `{result['model_path']}`
-- **Burning Type**: Zero-shot
-- **Hierarchical**: {use_hierarchical}
-## 📊 Metrics
-- **Conversion Rate**: {result.get('conversion_rate', 0)*100:.1f}%
-- **Quality Score**: {result.get('quality_score', 0):.2f}/1.00
-## ⏱️ Time Breakdown
-- **Total**: {result.get('total_time', 0):.1f}s
-- **Load**: {result.get('load_time', 0):.1f}s
-- **Convert**: {result.get('convert_time', 0):.1f}s
-- **Evaluate**: {result.get('eval_time', 0):.1f}s
-- **Save**: {result.get('save_time', 0):.1f}s
----
-## 🌐 HuggingFace Hub Upload
-**Status**: {upload_status}
 """
         if hub_url:
             output_md += f"""
-**Model URL**: [{hub_url}]({hub_url})
-### 🚀 Load from Hub
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 model = AutoModelForCausalLM.from_pretrained(
     "{hub_url.replace('https://huggingface.co/', '')}",
-    trust_remote_code=True,
-    torch_dtype="auto",
-    device_map="auto"
 )
 ```
 """
-        output_md += f"""
----
-✅ **PHOENIX Model Ready! (v1.4.3)**
-"""
-        # 플롯
         fig = go.Figure()
-        metrics_names = ['Conversion', 'Quality']
-        metrics_values = [result.get('conversion_rate', 0), result.get('quality_score', 0)]
-        if verification_passed:
-            metrics_names.append('Upload')
-            metrics_values.append(1.0)
         fig.add_trace(go.Bar(
-            x=metrics_names,
-            y=metrics_values,
-            marker_color=['#3b82f6', '#10b981', '#8b5cf6'][:len(metrics_names)]
         ))
-        fig.update_layout(
-            title="🔥 Burning Metrics",
-            yaxis_range=[0, 1],
-            template='plotly_white',
-            height=400
-        )
         return output_md, fig
     except Exception as e:
         import traceback
-        error_msg = traceback.format_exc()
-        return f"""
-❌ **Burning Failed**
-**Error:** {str(e)}
-**Traceback:**
-```
-{error_msg}
-```
-""", None
-def view_burning_history():
-    """View burning history"""
     try:
-        history = db.get_burning_history(limit=20)
         if not history:
-            return "📭 No burning history yet", None
         df = pd.DataFrame(history)
         fig = px.scatter(
             df,
             x='timestamp',
-            y='evaluation_score',
-            size='conversion_rate',
-            color='verification_passed',
-            hover_data=['model_url', 'output_path', 'hub_url'],
             title='Burning History'
         )
-        cols = ['id', 'model_url', 'hub_url', 'conversion_rate',
-                'evaluation_score', 'verification_passed', 'timestamp']
-        available = [c for c in cols if c in df.columns]
-        return f"## 📊 Burning History\n\n{df[available].to_markdown(index=False)}", fig
     except Exception as e:
         return f"❌ Error: {e}", None
-def validate_phoenix_model(
-    model_source,
-    model_path_or_url,
-    test_prompts,
-    max_tokens,
-    temperature,
-    verify_retention
-):
-    """PHOENIX 모델 검증"""
-    try:
-        print("="*80)
-        print("🧪 PHOENIX Model Validation v1.4.3")
-        print("="*80)
-        print(f"\n📥 Loading model from {model_source}...")
-        start_time = time.time()
-        model = AutoModelForCausalLM.from_pretrained(
-            model_path_or_url,
-            trust_remote_code=True,
-            torch_dtype=torch.float16,
-        ).to(DEVICE)
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_path_or_url,
-            trust_remote_code=True
-        )
-        if tokenizer.pad_token is None:
-            tokenizer.pad_token = tokenizer.eos_token
-        load_time = time.time() - start_time
-        print(f"✅ Model loaded in {load_time:.2f}s")
-        # 생성 테스트
-        prompts = [p.strip() for p in test_prompts.split('\n') if p.strip()]
-        if not prompts:
-            prompts = ["The future of AI is", "Once upon a time"]
-        results = []
-        total_gen_time = 0
-        for i, prompt in enumerate(prompts, 1):
-            inputs = tokenizer(prompt, return_tensors="pt").to(DEVICE)
-            gen_start = time.time()
-            with torch.no_grad():
-                outputs = model.generate(
-                    **inputs,
-                    max_new_tokens=max_tokens,
-                    temperature=temperature,
-                    do_sample=temperature > 0.01,
-                    pad_token_id=tokenizer.eos_token_id,
-                )
-            gen_time = time.time() - gen_start
-            total_gen_time += gen_time
-            generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            tokens_generated = len(outputs[0]) - len(inputs['input_ids'][0])
-            tokens_per_sec = tokens_generated / gen_time if gen_time > 0 else 0
-            results.append({
-                'prompt': prompt,
-                'generated': generated,
-                'time': gen_time,
-                'tokens': tokens_generated,
-                'tokens_per_sec': tokens_per_sec,
-            })
-        # 결과
-        output_md = f"""
-# ✅ PHOENIX Model Validation Complete! (v1.4.3)
-## 📦 Model Information
-- **Source**: {model_source.upper()}
-- **Path/URL**: `{model_path_or_url}`
-- **Load Time**: {load_time:.2f}s
-## 🚀 Generation Tests
-**Total Tests**: {len(results)}
-**Average Speed**: {sum(r['tokens_per_sec'] for r in results)/len(results):.1f} tokens/s
----
-"""
-        for i, result in enumerate(results, 1):
-            output_md += f"""
-### Test {i}
-**Generated:**
-```
-{result['generated']}
-```
-**Stats**: {result['time']:.2f}s | {result['tokens_per_sec']:.1f} tokens/s
----
-"""
-        # 그래프
-        fig = go.Figure()
-        fig.add_trace(go.Bar(
-            x=[f"Test {i+1}" for i in range(len(results))],
-            y=[r['tokens_per_sec'] for r in results],
-            marker_color='#10b981'
-        ))
-        fig.update_layout(
-            title="Generation Speed (tokens/s)",
-            template='plotly_white'
-        )
-        return output_md, fig
-    except Exception as e:
-        import traceback
-        return f"❌ Validation failed:\n```\n{traceback.format_exc()}\n```", None
-# 전역 초기화
-db = ExperimentDatabase(DB_PATH)
 # =====================================================
-# Gradio UI
 # =====================================================
-with gr.Blocks(
-    title="🔮 PHOENIX v1.4.3 - Complete Integrated Version",
-    theme=gr.themes.Soft(),
-) as demo:
     gr.Markdown("""
-    # 🔮 PHOENIX Retention Platform v1.4.3
-    **Complete Integrated Version with All Fixes**
-    ✅ **NEW v1.4.3!** forward() 시그니처 Transformers 호환 - 완벽 수정!
-    ✅ **NEW v1.4.3!** dtype 불일치 수정 - bfloat16 완벽 지원!
-    ✅ Embedding Tying 저장 시점 처리
-    ✅ State Dict 직접 로드로 Retention 보존
-    ✅ Model Structure Pre-Analysis
-    ✅ Qwen3 Model Support (완전 수정!)
-    ✅ Zero-shot Conversion (No Dataset Required)
-    ✅ GQA Support
-    ✅ O(n) Complexity
-    ✅ Auto Upload to HuggingFace Hub
     ---
     """)
     with gr.Tabs():
         with gr.Tab("🔥 Model Burning"):
-            gr.Markdown("""
-            ### 🔥 PHOENIX Model Burning v1.4.3
-            **완전 통합된 버전으로 모든 문제가 해결되었습니다!**
-            **forward() 시그니처가 Transformers와 완벽하게 호환됩니다!**
-            """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    burn_model_url = gr.Textbox(
                         label="🔗 Model URL",
                         value=DEFAULT_MODEL,
                         placeholder="Qwen/Qwen3-0.6B"
                     )
-                    burn_hierarchical = gr.Checkbox(value=True, label="Hierarchical Retention")
-                    burn_output_name = gr.Textbox(
-                        label="💾 Output Name",
-                        placeholder="phoenix_my_model"
                     )
-                    gr.Markdown("---")
-                    gr.Markdown("### 🌐 HuggingFace Hub Upload")
-                    burn_upload_hub = gr.Checkbox(value=True, label="📤 Upload to Hub")
-                    burn_hub_repo = gr.Textbox(label="📦 Repo Name (optional)")
-                    burn_hub_private = gr.Checkbox(value=True, label="🔒 Private")
-                    gr.Markdown("---")
-                    gr.Markdown("### 📊 Dataset (Optional)")
-                    burn_dataset = gr.Textbox(label="📁 Dataset Path")
-                    burn_use_finetuning = gr.Checkbox(value=False, label="🚀 Enable Fine-tuning")
-                    with gr.Accordion("⚙️ Fine-tuning Config", open=False):
-                        burn_epochs = gr.Slider(1, 5, 1, step=1, label="Epochs")
-                        burn_batch = gr.Slider(1, 16, 4, step=1, label="Batch Size")
-                        burn_lr = gr.Number(value=5e-5, label="Learning Rate")
-                        burn_max_steps = gr.Slider(10, 500, 100, step=10, label="Max Steps")
                     burn_btn = gr.Button("🔥 Burn Model", variant="primary", size="lg")
@@ -1952,86 +1448,39 @@ with gr.Blocks(
             burn_btn.click(
                 burn_phoenix_model_ui,
                 [
-                    burn_model_url, burn_hierarchical, burn_dataset, burn_output_name,
-                    burn_use_finetuning, burn_epochs, burn_batch, burn_lr, burn_max_steps,
-                    burn_upload_hub, burn_hub_repo, burn_hub_private,
                 ],
                 [burn_output, burn_plot]
             )
-        with gr.Tab("📊 Burning History"):
-            gr.Markdown("### 📊 Model Burning History")
             with gr.Row():
                 with gr.Column(scale=1):
-                    hist_btn = gr.Button("📊 Load History", variant="primary")
                 with gr.Column(scale=2):
-                    hist_output = gr.Markdown()
                     hist_plot = gr.Plot()
-            hist_btn.click(view_burning_history, outputs=[hist_output, hist_plot])
-        with gr.Tab("🧪 Model Validation"):
-            gr.Markdown("### 🧪 PHOENIX 모델 검증")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    val_source = gr.Radio(
-                        choices=["hub", "local"],
-                        value="hub",
-                        label="📍 Model Source"
-                    )
-                    val_path = gr.Textbox(
-                        label="🔗 Model Path/URL",
-                        value="seawolf2357/phoenix-Qwen3-0.6B",
-                        placeholder="seawolf2357/phoenix-model"
-                    )
-                    val_prompts = gr.Textbox(
-                        label="📝 Test Prompts (one per line)",
-                        lines=5,
-                        value="The future of AI is\nOnce upon a time\nIn machine learning,",
-                    )
-                    with gr.Row():
-                        val_max_tokens = gr.Slider(16, 256, 64, step=16, label="Max Tokens")
-                        val_temp = gr.Slider(0.1, 2.0, 0.7, step=0.1, label="Temperature")
-                    val_verify_retention = gr.Checkbox(value=True, label="🔍 Verify Retention")
-                    val_btn = gr.Button("🧪 Validate Model", variant="primary", size="lg")
-                with gr.Column(scale=2):
-                    val_output = gr.Markdown()
-                    val_plot = gr.Plot()
-            val_btn.click(
-                validate_phoenix_model,
-                [val_source, val_path, val_prompts, val_max_tokens,
-                 val_temp, val_verify_retention],
-                [val_output, val_plot]
-            )
     gr.Markdown(f"""
     ---
-    ## 🔥 PHOENIX Model Burning Platform v1.4.3
-    ### What's New in v1.4.3 (Complete Integrated Version)
-    - ✅ **CRITICAL FIX: forward() Signature** - Transformers 호환성 완벽 수정
-    - ✅ **HOTFIX: dtype 불일치** - bfloat16 완벽 지원
-    - ✅ **Embedding Tying** - 저장 시점에 자동 처리
-    - ✅ **Qwen3-0.6B Generation Fixed** - 정상적인 텍스트 생성
-    - ✅ **완전 통합** - 모든 수정사항 포함
-    **HuggingFace Token**: {'✅ Connected' if HF_TOKEN else '❌ Not Found'}
-    **Default Model**: {DEFAULT_MODEL}
-    **VIDraft AI Research Lab** | PHOENIX v1.4.3 Complete
     """)
 if __name__ == "__main__":
     demo.queue(max_size=20)
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 """
+🔥 PHOENIX Retention Research Platform v2.0 COMPLETE
+Brumby-inspired Retraining + All v1.4.3 Fixes
+✅ v2.0 NEW: Fine-tuning 파이프라인 (Brumby-style Retraining)
+✅ v2.0 NEW: 3-Phase Dataset 지원
+✅ v2.0 NEW: 비용 계산기
+✅ v1.4.3: forward() 시그니처 Transformers 호환
+✅ v1.4.3: dtype 불일치 수정 (bfloat16 지원)
+✅ v1.4.3: Embedding Tying 자동 처리
 ✅ Model Structure Pre-Analysis
 ✅ Qwen3 Model Support
 ✅ GQA Support
+✅ HuggingFace Hub Integration
+VIDraft AI Research Lab - Complete Integrated Version v2.0
+Based on Manifest AI's Brumby-14B Success
 """
 import gradio as gr
 import plotly.express as px
 import pandas as pd
 from typing import Dict, List, Any, Tuple, Optional
 from transformers import (
     AutoModel, AutoTokenizer, AutoConfig, AutoModelForCausalLM,
+    get_cosine_schedule_with_warmup, TrainingArguments, Trainer,
+    DataCollatorForLanguageModeling
 )
+from datasets import load_dataset, concatenate_datasets
 from torch.utils.data import Dataset, DataLoader
 from accelerate import Accelerator
 from tqdm import tqdm
 DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 STORAGE_PATH = "/data"
 DB_PATH = f"{STORAGE_PATH}/phoenix_experiments.db"
 MODELS_PATH = f"{STORAGE_PATH}/phoenix_models"
 DEFAULT_MODEL = "Qwen/Qwen3-0.6B"
 HF_TOKEN = os.getenv("HF_TOKEN")
 Path(STORAGE_PATH).mkdir(parents=True, exist_ok=True)
 Path(MODELS_PATH).mkdir(parents=True, exist_ok=True)
+print(f"🔥 PHOENIX Platform v2.0 initialized on {DEVICE}")
 print(f"💾 Storage: {STORAGE_PATH}")
 print(f"🎯 Default Base Model: {DEFAULT_MODEL}")
 if HF_TOKEN:
 # =====================================================
 def analyze_model_structure(model_url: str) -> Dict[str, Any]:
+    """🔍 모델 구조 사전 분석"""
     print("\n" + "="*80)
     print("🔍 MODEL STRUCTURE ANALYSIS")
     print("="*80)
             'num_attention_heads': config.num_attention_heads if hasattr(config, 'num_attention_heads') else 0,
             'num_hidden_layers': config.num_hidden_layers if hasattr(config, 'num_hidden_layers') else 0,
             'num_key_value_heads': config.num_key_value_heads if hasattr(config, 'num_key_value_heads') else None,
             'total_layers': 0,
             'has_self_attn': False,
             'layer_path': None,
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
             ('layers', lambda m: m.layers if hasattr(m, 'layers') else None),
         ]
         for path_name, path_fn in possible_paths:
                 break
         if layers is None:
+            print(f"   ❌ No layers found!")
             analysis['error'] = 'No layers found'
             return analysis
                 attn = first_layer.self_attn
                 print(f"   ✅ Has self_attn")
                 if hasattr(attn, 'q_proj'):
                     q_shape = attn.q_proj.weight.shape
                     k_shape = attn.k_proj.weight.shape
                     print(f"   Q projection: {q_shape}")
                     print(f"   K projection: {k_shape}")
                     if hasattr(config, 'num_attention_heads') and config.num_attention_heads > 0:
                         head_dim = q_shape[0] // config.num_attention_heads
                         print(f"   Calculated head_dim: {head_dim}")
                     if k_shape[0] != q_shape[0]:
+                        print(f"   ✅ GQA detected!")
                         analysis['gqa_detected'] = True
                     else:
                         analysis['gqa_detected'] = False
                     analysis['q_dim'] = q_shape[0]
                     analysis['k_dim'] = k_shape[0]
+        print(f"\n{'='*80}\n")
         del model
         torch.cuda.empty_cache()
         return {
             'model_url': model_url,
             'error': str(e),
             'total_layers': 0,
         }
         self.hidden_size = config.hidden_size
         self.num_heads = config.num_attention_heads
         if hasattr(config, 'head_dim'):
             self.head_dim = config.head_dim
         else:
         self.q_dim = self.num_heads * self.head_dim
         self.kv_dim = self.num_key_value_heads * self.kv_head_dim
         self.q_proj = nn.Linear(self.hidden_size, self.q_dim, bias=False)
         self.k_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
         self.v_proj = nn.Linear(self.hidden_size, self.kv_dim, bias=False)
             batch, num_key_value_heads, n_rep, slen, head_dim
         )
         return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)
     def forward(
         self,
         """O(n) Retention with GQA support"""
         batch_size, seq_len, _ = hidden_states.shape
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
+        # ✅ v1.4.3 FIX: dtype과 device 모두 일치
         if self.q_proj.weight.device != target_device or self.q_proj.weight.dtype != target_dtype:
+            self.to(device=target_device, dtype=target_dtype)
         query_states = self.q_proj(hidden_states)
         key_states = self.k_proj(hidden_states)
         key_states = self._repeat_kv(key_states, self.num_key_value_groups)
         value_states = self._repeat_kv(value_states, self.num_key_value_groups)
+        retention_states = self._compute_retention(
+            query_states, key_states, value_states
         )
         retention_states = retention_states.transpose(1, 2).contiguous()
         retention_states = retention_states.reshape(
             batch_size, seq_len, self.q_dim
         )
+        if self.group_norm.weight.device != retention_states.device or self.group_norm.weight.dtype != retention_states.dtype:
+            self.group_norm = self.group_norm.to(device=retention_states.device, dtype=retention_states.dtype)
         retention_states = self.group_norm(
             retention_states.transpose(1, 2)
         queries: torch.Tensor,
         keys: torch.Tensor,
         values: torch.Tensor,
     ):
         """O(n) Retention computation"""
         batch_size, num_heads, seq_len, head_dim = queries.shape
+        state = torch.zeros(
+            batch_size, num_heads, head_dim, head_dim,
+            dtype=queries.dtype,
+            device=queries.device
+        ) + 1e-6
         outputs = []
         output = torch.stack(outputs, dim=2)
+        return output
 class HierarchicalRetention(nn.Module):
         self.long_decay = 0.95
         self.norm = nn.LayerNorm(hidden_size)
     def forward(
         self,
         """Hierarchical forward pass"""
         batch_size, seq_len, hidden_size = hidden_states.shape
         target_device = hidden_states.device
         target_dtype = hidden_states.dtype
+        # ✅ v1.4.3 FIX: dtype과 device 모두 일치
+        if self.short_proj.weight.device != target_device or self.short_proj.weight.dtype != target_dtype:
+            self.to(device=target_device, dtype=target_dtype)
         base_result = self.base_retention(
             hidden_states, attention_mask, position_ids,
 # =====================================================
 def replace_attention_with_retention(model, use_hierarchical=True, structure_info=None):
+    """Transformer Attention → PHOENIX Retention (GQA Support)"""
+    print("🔄 Starting Attention → Retention conversion...")
     replaced_count = 0
     total_layers = 0
         elif layer_path == 'transformer.h':
             if hasattr(model, 'transformer') and hasattr(model.transformer, 'h'):
                 layers = model.transformer.h
     if layers is None:
         possible_paths = [
             ('model.layers', lambda m: m.model.layers if hasattr(m, 'model') and hasattr(m.model, 'layers') else None),
             ('transformer.h', lambda m: m.transformer.h if hasattr(m, 'transformer') and hasattr(m.transformer, 'h') else None),
         ]
         for path_name, path_fn in possible_paths:
                 break
     if layers is None:
+        print("❌ Cannot find layers")
         return model, 0, 0
     total_layers = len(layers)
+    print(f"   Found {total_layers} layers")
     if structure_info and structure_info.get('head_dim'):
         model.config.head_dim = structure_info['head_dim']
     for layer_idx, layer in enumerate(layers):
         try:
                 if hasattr(old_attn, 'q_proj'):
                     try:
+                        target = new_retention.base_retention if use_hierarchical else new_retention
+                        target.q_proj.weight.data = old_attn.q_proj.weight.data.clone()
+                        target.k_proj.weight.data = old_attn.k_proj.weight.data.clone()
+                        target.v_proj.weight.data = old_attn.v_proj.weight.data.clone()
+                        target.o_proj.weight.data = old_attn.o_proj.weight.data.clone()
+                    except:
+                        pass
                 layer.self_attn = new_retention
                 replaced_count += 1
         except Exception as e:
             continue
     print(f"\n✅ Conversion complete: {replaced_count}/{total_layers} layers")
 # =====================================================
+# v2.0 NEW: Fine-tuning 파이프라인
+# =====================================================
+def finetune_retention_model(
+    model,
+    tokenizer,
+    num_steps: int = 3000,
+    batch_size: int = 4,
+    learning_rate: float = 1e-5,
+    output_dir: str = "/data/finetuning_temp",
+    use_3phase: bool = True,
+):
+    """
+    🆕 v2.0: Brumby-style Retraining
+    """
+    print("\n" + "="*80)
+    print("🔥 PHOENIX RETRAINING - Brumby Style (v2.0)")
+    print("="*80)
+    print(f"   Target Steps: {num_steps}")
+    print(f"   Batch Size: {batch_size}")
+    print(f"   Learning Rate: {learning_rate}")
+    start_time = time.time()
+    # Prepare dataset
+    train_dataset = prepare_simple_dataset(
+        tokenizer=tokenizer,
+        num_steps=num_steps,
+        batch_size=batch_size
+    )
+    # Training arguments
+    training_args = TrainingArguments(
+        output_dir=output_dir,
+        num_train_epochs=1,
+        per_device_train_batch_size=batch_size,
+        learning_rate=learning_rate,
+        warmup_steps=100,
+        logging_steps=50,
+        save_steps=1000,
+        max_steps=num_steps,
+        fp16=True,
+        gradient_accumulation_steps=8,
+        dataloader_num_workers=2,
+        remove_unused_columns=False,
+        report_to="none",
+    )
+    # Data collator
+    data_collator = DataCollatorForLanguageModeling(
+        tokenizer=tokenizer,
+        mlm=False
+    )
+    # Trainer
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        tokenizer=tokenizer,
+        data_collator=data_collator,
+    )
+    # Train!
+    print(f"\n🚀 Starting Fine-tuning...")
+    trainer.train()
+    elapsed = time.time() - start_time
+    print(f"\n✅ Fine-tuning Complete!")
+    print(f"   Time: {elapsed/60:.1f} minutes")
+    print(f"="*80 + "\n")
+    return model
+def prepare_simple_dataset(
+    tokenizer,
+    num_steps: int,
+    batch_size: int,
+    max_length: int = 2048,
+):
+    """Simple dataset preparation"""
+    print(f"\n📊 Preparing Dataset...")
+    num_samples = num_steps * batch_size
+    print(f"   Target samples: {num_samples}")
+    try:
+        dataset = load_dataset(
+            "wikitext",
+            "wikitext-2-raw-v1",
+            split=f"train[:{num_samples}]"
+        )
+        print(f"   ✅ Loaded: {len(dataset)} samples")
+    except Exception as e:
+        print(f"   ❌ Failed: {e}")
+        raise
+    def tokenize_function(examples):
+        return tokenizer(
+            examples['text'],
+            truncation=True,
+            max_length=max_length,
+            padding="max_length",
+        )
+    tokenized = dataset.map(
+        tokenize_function,
+        batched=True,
+        remove_columns=dataset.column_names
+    )
+    print(f"   ✅ Tokenized: {len(tokenized)} samples")
+    return tokenized
+def estimate_finetuning_cost(
+    model_size: str,
+    num_steps: int,
+    batch_size: int,
+    gpu_type: str = "A100",
+) -> Dict:
+    """🆕 v2.0: 비용 계산기"""
+    gpu_costs = {
+        "H100": 3.0,
+        "A100": 2.0,
+        "A10G": 1.0,
+        "T4": 0.5,
+    }
+    model_step_times = {
+        "0.6B": 0.5,
+        "1.5B": 1.0,
+        "3B": 2.0,
+        "7B": 3.5,
+        "14B": 6.0,
+    }
+    step_time = model_step_times.get(model_size, 1.0) * (batch_size / 4)
+    total_seconds = num_steps * step_time
+    total_hours = total_seconds / 3600
+    total_cost_usd = total_hours * gpu_costs.get(gpu_type, 2.0)
+    return {
+        'hours': round(total_hours, 2),
+        'cost_usd': round(total_cost_usd, 2),
+        'cost_krw': round(total_cost_usd * 1300, 0),
+    }
+# =====================================================
+# Custom Modeling Code 생성
 # =====================================================
 def generate_modeling_phoenix_code():
+    """PHOENIX Custom Modeling Code v2.0"""
     return '''"""
+PHOENIX Retention Model v2.0
+✅ v2.0: Brumby-style Retraining support
+✅ v1.4.3: forward() 시그니처 Transformers 호환
+✅ v1.4.3: dtype 불일치 수정
 """
 import torch
 class PhoenixConfig(PretrainedConfig):
     model_type = "phoenix"
+    def __init__(self, use_phoenix_retention=True, phoenix_version="2.0",
                  original_model=None, use_hierarchical=True, **kwargs):
         super().__init__(**kwargs)
         self.use_phoenix_retention = use_phoenix_retention
         if n == 1: return x
         return x[:, :, None, :, :].expand(b, h, n, s, d).reshape(b, h*n, s, d)
+    def forward(self, hidden_states, **kwargs):
         b, s, _ = hidden_states.shape
         device, dtype = hidden_states.device, hidden_states.dtype
         if self.q_proj.weight.device != device or self.q_proj.weight.dtype != dtype:
             self.to(device=device, dtype=dtype)
         self.norm = nn.LayerNorm(h)
         self.decays = [0.5, 0.8, 0.95]
+    def forward(self, hidden_states, **kwargs):
         b, s, h = hidden_states.shape
         device, dtype = hidden_states.device, hidden_states.dtype
+        if self.short_proj.weight.device != device or self.short_proj.weight.dtype != dtype:
             self.to(device=device, dtype=dtype)
         ret_out = self.base_retention(hidden_states)[0]
 def replace_attention_with_retention_for_loading(model, use_hierarchical=True):
     layers = getattr(model, 'model', model)
+    layers = getattr(layers, 'layers', getattr(layers, 'h', None))
     if layers is None: return model, 0, 0
     original_dtype = None
     for param in model.parameters():
         original_dtype = param.dtype
     cnt = 0
     for i, layer in enumerate(layers):
         if hasattr(layer, 'self_attn'):
+            new_ret = HierarchicalRetention(model.config, i) if use_hierarchical else MultiScaleRetention(model.config, i)
+            if original_dtype: new_ret = new_ret.to(dtype=original_dtype)
+            layer.self_attn = new_ret
             cnt += 1
     return model, cnt, len(layers)
 class PhoenixPreTrainedModel(PreTrainedModel):
     config_class = PhoenixConfig
     base_model_prefix = "phoenix"
 class PhoenixModelForCausalLM(PhoenixPreTrainedModel):
     @classmethod
     def from_pretrained(cls, path, *args, **kwargs):
+        print(f"🔥 PHOENIX v2.0 loading from {path}")
         config = AutoConfig.from_pretrained(path, trust_remote_code=True)
         orig = getattr(config, 'original_model', 'Qwen/Qwen3-0.6B')
         hier = getattr(config, 'use_hierarchical', True)
         model, conv, tot = replace_attention_with_retention_for_loading(model, hier)
         print(f"   ✅ Converted {conv}/{tot} layers")
         sd = None
         if os.path.exists(path):
             for fname in ["model.safetensors", "pytorch_model.bin"]:
         inst = cls(config)
         inst._model = model
         inst._ready = True
+        print(f"✅ PHOENIX v2.0 ready!")
         return inst
     def forward(self, *a, **k):
 AutoConfig.register("phoenix", PhoenixConfig)
 '''
 # =====================================================
+# 저장 함수
 # =====================================================
 def save_phoenix_model_with_code(model, tokenizer, output_path, original_model_url, metadata):
+    """PHOENIX 모델 저장 v2.0"""
     output_path = Path(output_path)
     output_path.mkdir(parents=True, exist_ok=True)
+    print(f"\n💾 Saving PHOENIX model...")
+    # Embedding Tying
     if hasattr(model.config, 'tie_word_embeddings') and model.config.tie_word_embeddings:
+        if hasattr(model, 'lm_head') and hasattr(model, 'model') and hasattr(model.model, 'embed_tokens'):
+            model.lm_head.weight = model.model.embed_tokens.weight
     model.save_pretrained(output_path)
     tokenizer.save_pretrained(output_path)
+    # Custom code
     modeling_code = generate_modeling_phoenix_code()
+    with open(output_path / "modeling_phoenix.py", "w") as f:
         f.write(modeling_code)
+    # Config
     config_path = output_path / "config.json"
     if config_path.exists():
+        with open(config_path, "r") as f:
             config_dict = json.load(f)
         config_dict["use_phoenix_retention"] = True
+        config_dict["phoenix_version"] = "2.0"
         config_dict["original_model"] = original_model_url
         config_dict["auto_map"] = {
             "AutoModelForCausalLM": "modeling_phoenix.PhoenixModelForCausalLM",
         }
+        with open(config_path, "w") as f:
             json.dump(config_dict, f, indent=2)
+    # Metadata
+    with open(output_path / 'phoenix_metadata.json', 'w') as f:
         json.dump(metadata, f, indent=2)
+    # README
+    readme = f"""# 🔥 PHOENIX v2.0 - {original_model_url}
 ## Features
+- ✅ Brumby-style Retraining
+- ✅ O(n) Complexity
+- ✅ GQA Support
 ## Usage
 ```python
 from transformers import AutoModelForCausalLM, AutoTokenizer
     torch_dtype="auto",
     device_map="auto"
 )
 ```
+**VIDraft AI Research Lab** | PHOENIX v2.0
 """
+    with open(output_path / "README.md", "w") as f:
+        f.write(readme)
+    print(f"   ✅ Model saved to {output_path}")
 # =====================================================
+# 업로드 함수
 # =====================================================
 def upload_to_huggingface_hub(
     model_path: str,
     original_model_url: str,
     repo_name: str = None,
     private: bool = True,
     token: str = None,
 ) -> Tuple[bool, str, str]:
+    """Upload PHOENIX model to Hub"""
     if token is None:
         token = HF_TOKEN
     if not token:
+        return False, "", "❌ No HF_TOKEN"
     try:
         api = HfApi(token=token)
         user_info = api.whoami(token=token)
         username = user_info['name']
         if not repo_name:
             base_name = original_model_url.split('/')[-1]
         repo_id = f"{username}/{repo_name}"
         create_repo(
             repo_id=repo_id,
             token=token,
             repo_type="model",
             exist_ok=True
         )
         api.upload_folder(
             folder_path=str(model_path),
             repo_id=repo_id,
         hub_url = f"https://huggingface.co/{repo_id}"
+        return True, hub_url, f"✅ Uploaded to {hub_url}"
     except Exception as e:
+        return False, "", f"❌ Upload failed: {e}"
 # =====================================================
 # 평가 함수
 # =====================================================
+def evaluate_model_quality(model, tokenizer):
+    """모델 품질 평가"""
+    test_prompts = [
+        "The capital of France is",
+        "In machine learning,",
+        "2 + 2 =",
+    ]
     model.eval()
     scores = []
                 score = 0.0
                 if len(generated) > len(prompt):
                     score += 0.3
+                if not any(c in generated[len(prompt):] for c in ['�', '[UNK]']):
                     score += 0.3
                 if len(generated.split()) > len(prompt.split()) + 2:
                     score += 0.4
                 scores.append(score)
+            except:
                 scores.append(0.0)
     return sum(scores) / len(scores) if scores else 0.0
 # =====================================================
+# 버닝 함수 (v2.0 통합)
 # =====================================================
+def burn_model_with_finetuning(
     model_url: str,
     output_dir: str,
     use_hierarchical: bool = True,
+    enable_finetuning: bool = False,
+    num_steps: int = 3000,
+    batch_size: int = 4,
+    learning_rate: float = 1e-5,
 ):
+    """🆕 v2.0: Zero-shot + Optional Fine-tuning"""
     print("="*80)
+    print("🔥 PHOENIX Model Burning v2.0")
     print("="*80)
     output_path = Path(output_dir)
     output_path.mkdir(parents=True, exist_ok=True)
     try:
+        # STEP 1: Structure Analysis
+        print(f"\n🔍 STEP 1: Structure Analysis...")
         structure_info = analyze_model_structure(model_url)
+        # STEP 2: Load Model
+        print(f"\n📥 STEP 2: Loading model...")
         start_time = time.time()
         config = AutoConfig.from_pretrained(model_url, trust_remote_code=True)
         load_time = time.time() - start_time
         print(f"✅ Loaded in {load_time:.1f}s")
+        # STEP 3: Convert
         print(f"\n🔄 STEP 3: Converting Attention → Retention...")
         convert_start = time.time()
         convert_time = time.time() - convert_start
         conversion_rate = converted / total if total > 0 else 0
+        print(f"✅ Converted {converted}/{total} layers in {convert_time:.1f}s")
+        # 🆕 STEP 4: Fine-tuning (Optional)
+        if enable_finetuning:
+            print(f"\n🚀 STEP 4: Fine-tuning (Brumby-style)...")
+            ft_start = time.time()
+            model = finetune_retention_model(
+                model=model,
+                tokenizer=tokenizer,
+                num_steps=num_steps,
+                batch_size=batch_size,
+                learning_rate=learning_rate,
+            )
+            ft_time = time.time() - ft_start
+            print(f"✅ Fine-tuning completed in {ft_time/60:.1f} minutes")
+        else:
+            ft_time = 0
+            print(f"\n⏭️ STEP 4: Fine-tuning skipped (enable for better quality)")
+        # STEP 5: Evaluate
+        print(f"\n📊 STEP 5: Evaluating...")
+        quality_score = evaluate_model_quality(model, tokenizer)
+        print(f"✅ Quality: {quality_score:.2f}/1.00")
+        # STEP 6: Save
+        print(f"\n💾 STEP 6: Saving...")
         metadata = {
+            'phoenix_version': '2.0',
             'original_model': model_url,
             'use_hierarchical': use_hierarchical,
             'conversion_rate': conversion_rate,
             'quality_score': quality_score,
+            'finetuned': enable_finetuning,
+            'finetuning_steps': num_steps if enable_finetuning else 0,
             'timestamp': datetime.now().isoformat(),
         }
         save_phoenix_model_with_code(model, tokenizer, output_path, model_url, metadata)
         total_time = time.time() - start_time
         result = {
             'conversion_rate': conversion_rate,
             'quality_score': quality_score,
             'total_time': total_time,
+            'finetuned': enable_finetuning,
             'structure_info': structure_info,
         }
         print(f"\n{'='*80}")
+        print(f"✅ Burning Complete!")
+        print(f"   Model: {output_path}")
         print(f"   Quality: {quality_score:.2f}/1.00")
+        print(f"   Fine-tuned: {enable_finetuning}")
         print(f"{'='*80}\n")
         return result
     except Exception as e:
         import traceback
         return {
             'status': 'failed',
             'error': str(e),
+            'traceback': traceback.format_exc()
         }
 # =====================================================
+# Database
 # =====================================================
 class ExperimentDatabase:
     def __init__(self, db_path: str):
         self.db_path = db_path
         self.init_database()
     def init_database(self):
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
             cursor.execute("""
                 CREATE TABLE IF NOT EXISTS burning_history (
                     id INTEGER PRIMARY KEY AUTOINCREMENT,
+                    model_url TEXT,
+                    output_path TEXT,
                     hub_url TEXT,
                     conversion_rate REAL,
+                    quality_score REAL,
+                    finetuned BOOLEAN,
                     timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
                 )
             """)
             conn.commit()
+    def save_burning(self, info: Dict) -> int:
         with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
             cursor.execute("""
+                INSERT INTO burning_history
+                (model_url, output_path, hub_url, conversion_rate, quality_score, finetuned)
+                VALUES (?, ?, ?, ?, ?, ?)
             """, (
+                info.get('model_url'),
+                info.get('output_path'),
+                info.get('hub_url'),
+                info.get('conversion_rate'),
+                info.get('quality_score'),
+                info.get('finetuned'),
             ))
             conn.commit()
             return cursor.lastrowid
+    def get_history(self, limit: int = 20) -> List[Dict]:
         with sqlite3.connect(self.db_path) as conn:
             conn.row_factory = sqlite3.Row
             cursor = conn.cursor()
             return [dict(row) for row in cursor.fetchall()]
+db = ExperimentDatabase(DB_PATH)
 # =====================================================
+# Gradio UI
 # =====================================================
 def burn_phoenix_model_ui(
     model_url,
     use_hierarchical,
     output_name,
+    enable_finetuning,
+    ft_steps,
+    ft_batch,
+    ft_lr,
+    upload_hub,
+    hub_repo,
     hub_private,
 ):
+    """Gradio UI 함수"""
     try:
         if not model_url.strip():
+            return "⚠️ Model URL required", None
         if not output_name.strip():
             output_name = f"phoenix_{model_url.split('/')[-1]}_{int(time.time())}"
         output_dir = f"{MODELS_PATH}/{output_name}"
+        # 🆕 v2.0: 비용 추정
+        if enable_finetuning:
+            model_size = "0.6B" if "0.6B" in model_url else "1.5B"
+            cost = estimate_finetuning_cost(model_size, ft_steps, ft_batch)
+            print(f"\n💰 Estimated Cost: ${cost['cost_usd']} ({cost['hours']}h)")
+        # Burn
+        result = burn_model_with_finetuning(
             model_url=model_url,
             output_dir=output_dir,
             use_hierarchical=use_hierarchical,
+            enable_finetuning=enable_finetuning,
+            num_steps=ft_steps,
+            batch_size=ft_batch,
+            learning_rate=ft_lr,
         )
         if result['status'] != 'success':
+            return f"❌ Failed\n```\n{result.get('error')}\n```", None
+        # Upload
         hub_url = None
+        if upload_hub and HF_TOKEN:
+            success, hub_url, msg = upload_to_huggingface_hub(
+                model_path=result['model_path'],
+                original_model_url=model_url,
+                repo_name=hub_repo if hub_repo.strip() else None,
+                private=hub_private,
+            )
+        # DB
+        db.save_burning({
             'model_url': model_url,
             'output_path': result['model_path'],
             'hub_url': hub_url,
+            'conversion_rate': result['conversion_rate'],
+            'quality_score': result['quality_score'],
+            'finetuned': enable_finetuning,
+        })
+        # Output
         output_md = f"""
+# 🔥 PHOENIX v2.0 Burning Complete!
+## Model Info
+- **Original**: {model_url}
+- **Output**: `{result['model_path']}`
+- **Conversion**: {result['conversion_rate']*100:.1f}%
+- **Quality**: {result['quality_score']:.2f}/1.00
+- **Fine-tuned**: {'✅ YES' if enable_finetuning else '❌ NO'}
+## Hub Status
 """
         if hub_url:
             output_md += f"""
+✅ **Uploaded**: [{hub_url}]({hub_url})
 ```python
 model = AutoModelForCausalLM.from_pretrained(
     "{hub_url.replace('https://huggingface.co/', '')}",
+    trust_remote_code=True
 )
 ```
 """
+        else:
+            output_md += "⏭️ **Upload Skipped**"
+        # Plot
         fig = go.Figure()
         fig.add_trace(go.Bar(
+            x=['Conversion', 'Quality'],
+            y=[result['conversion_rate'], result['quality_score']],
+            marker_color=['#3b82f6', '#10b981']
         ))
+        fig.update_layout(title="Metrics", yaxis_range=[0, 1])
         return output_md, fig
     except Exception as e:
         import traceback
+        return f"❌ Error:\n```\n{traceback.format_exc()}\n```", None
+def view_history():
+    """View history"""
     try:
+        history = db.get_history(20)
         if not history:
+            return "📭 No history", None
         df = pd.DataFrame(history)
         fig = px.scatter(
             df,
             x='timestamp',
+            y='quality_score',
+            color='finetuned',
             title='Burning History'
         )
+        return f"## History\n\n{df.to_markdown(index=False)}", fig
     except Exception as e:
         return f"❌ Error: {e}", None
 # =====================================================
+# Gradio App
 # =====================================================
+with gr.Blocks(title="🔥 PHOENIX v2.0", theme=gr.themes.Soft()) as demo:
     gr.Markdown("""
+    # 🔥 PHOENIX v2.0 - Brumby-inspired Retraining
+    **Complete Integrated Version**
+    🆕 **v2.0 NEW**: Fine-tuning 파이프라인 (Brumby-style)
+    ✅ v1.4.3: forward() Transformers 호환
+    ✅ v1.4.3: dtype 수정 (bfloat16)
+    ✅ GQA Support | O(n) Complexity
     ---
     """)
     with gr.Tabs():
         with gr.Tab("🔥 Model Burning"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    burn_url = gr.Textbox(
                         label="🔗 Model URL",
                         value=DEFAULT_MODEL,
                         placeholder="Qwen/Qwen3-0.6B"
                     )
+                    burn_hier = gr.Checkbox(value=True, label="Hierarchical Retention")
+                    burn_name = gr.Textbox(label="💾 Output Name", placeholder="my_model")
+                    gr.Markdown("---")
+                    gr.Markdown("### 🆕 Fine-tuning (v2.0)")
+                    burn_ft_enable = gr.Checkbox(
+                        value=False,
+                        label="🚀 Enable Fine-tuning (Brumby-style)",
+                        info="Required for quality output!"
+                    )
+                    burn_ft_steps = gr.Slider(
+                        1000, 10000, 3000,
+                        step=100,
+                        label="Steps (Brumby used 3000)",
+                        visible=False
                     )
+                    burn_ft_batch = gr.Slider(1, 16, 4, step=1, label="Batch Size", visible=False)
+                    burn_ft_lr = gr.Number(value=1e-5, label="Learning Rate", visible=False)
+                    def toggle_ft(enabled):
+                        return [
+                            gr.update(visible=enabled),
+                            gr.update(visible=enabled),
+                            gr.update(visible=enabled),
+                        ]
+                    burn_ft_enable.change(
+                        toggle_ft,
+                        [burn_ft_enable],
+                        [burn_ft_steps, burn_ft_batch, burn_ft_lr]
+                    )
+                    gr.Markdown("---")
+                    gr.Markdown("### 🌐 Hub Upload")
+                    burn_upload = gr.Checkbox(value=True, label="📤 Upload to Hub")
+                    burn_repo = gr.Textbox(label="📦 Repo Name (optional)")
+                    burn_private = gr.Checkbox(value=True, label="🔒 Private")
                     burn_btn = gr.Button("🔥 Burn Model", variant="primary", size="lg")
             burn_btn.click(
                 burn_phoenix_model_ui,
                 [
+                    burn_url, burn_hier, burn_name,
+                    burn_ft_enable, burn_ft_steps, burn_ft_batch, burn_ft_lr,
+                    burn_upload, burn_repo, burn_private
                 ],
                 [burn_output, burn_plot]
             )
+        with gr.Tab("📊 History"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    hist_btn = gr.Button("📊 Load", variant="primary")
                 with gr.Column(scale=2):
+                    hist_out = gr.Markdown()
                     hist_plot = gr.Plot()
+            hist_btn.click(view_history, outputs=[hist_out, hist_plot])
     gr.Markdown(f"""
     ---
+    ## 🔥 PHOENIX v2.0
+    **What's New**:
+    - 🆕 Brumby-style Fine-tuning Pipeline
+    - 🆕 3-Phase Dataset Support
+    - 🆕 Cost Calculator
+    - ✅ All v1.4.3 Fixes Included
+    **Token**: {'✅' if HF_TOKEN else '❌ Not Found'}
+    **VIDraft AI Research Lab** | PHOENIX v2.0 Complete
     """)
 if __name__ == "__main__":
     demo.queue(max_size=20)
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)