"""
Romeo V8 training script - Super Ensemble with Multi-Algorithm Collaboration

Advanced ensemble model that combines 10+ different algorithms working together
for maximum accuracy and efficiency. Features stacking ensemble, dynamic weighting,
confidence calibration, and cross-validation ensemble.

Key Features:
- 10+ Base Algorithms: XGBoost, LightGBM, CatBoost, RandomForest, ExtraTrees,
  Neural Network, SVM, KNN, Logistic Regression, Naive Bayes
- Stacking Ensemble: Meta-learner learns from base learner predictions
- Dynamic Weighting: Real-time weight adjustment based on performance
- Confidence Calibration: Probability calibration for better fusion
- Cross-Validation Ensemble: Multiple CV folds combined
- Advanced Feature Engineering: Algorithm-specific feature optimization

Modes:
 - fast (default): smaller models, fewer algorithms, for smoke testing
 - full: all algorithms, larger models, comprehensive training
"""

import argparse
import os
import json
import time
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score, roc_auc_score, log_loss
from sklearn.calibration import CalibratedClassifierCV

# Base Algorithms
import xgboost as xgb
import lightgbm as lgb
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

# Neural Network
import tensorflow as tf
from tensorflow import keras

# Stacking and utilities
from sklearn.ensemble import StackingClassifier
import joblib
from scipy.optimize import minimize
from scipy.special import softmax

try:
    import catboost as cb
    CATBOOST_PRESENT = True
except Exception:
    CATBOOST_PRESENT = False
    print("CatBoost not available, will skip CatBoost algorithm")

try:
    import talib
    TALIB_PRESENT = True
except Exception:
    TALIB_PRESENT = False


class SumAxis1Layer(keras.layers.Layer):
    def call(self, inputs):
        return keras.backend.sum(inputs, axis=1)


def sma(series, window):
    return series.rolling(window).mean()


def ema(series, span):
    return series.ewm(span=span, adjust=False).mean()


def rsi(series, period=14):
    delta = series.diff()
    up = delta.clip(lower=0)
    down = -1 * delta.clip(upper=0)
    ma_up = up.ewm(alpha=1/period, adjust=False).mean()
    ma_down = down.ewm(alpha=1/period, adjust=False).mean()
    rs = ma_up / (ma_down + 1e-12)
    return 100 - (100 / (1 + rs))


class SuperEnsembleFeatureEngineer:
    def __init__(self):
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=0.95)

    def add_technical_indicators(self, df):
        """Enhanced technical indicators optimized for multiple algorithms"""
        if TALIB_PRESENT:
            df['SMA_20'] = talib.SMA(df['Close'], timeperiod=20)
            df['SMA_50'] = talib.SMA(df['Close'], timeperiod=50)
            df['EMA_12'] = talib.EMA(df['Close'], timeperiod=12)
            df['EMA_26'] = talib.EMA(df['Close'], timeperiod=26)
            df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
            macd, macdsig, macdhist = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
            df['MACD'] = macd
            df['MACDSignal'] = macdsig
            upper, mid, lower = talib.BBANDS(df['Close'], timeperiod=20)
            df['BB_Upper'] = upper
            df['BB_Middle'] = mid
            df['BB_Lower'] = lower
            df['ATR'] = talib.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)
            df['MFI'] = talib.MFI(df['High'], df['Low'], df['Close'], df['Volume'], timeperiod=14)
        else:
            df['SMA_20'] = sma(df['Close'], 20)
            df['SMA_50'] = sma(df['Close'], 50)
            df['EMA_12'] = ema(df['Close'], 12)
            df['EMA_26'] = ema(df['Close'], 26)
            df['RSI'] = rsi(df['Close'], 14)
            df['MACD'] = df['Close'].ewm(span=12, adjust=False).mean() - df['Close'].ewm(span=26, adjust=False).mean()
            df['MACDSignal'] = df['MACD'].ewm(span=9, adjust=False).mean()
            rolling_std = df['Close'].rolling(20).std()
            df['BB_Middle'] = df['Close'].rolling(20).mean()
            df['BB_Upper'] = df['BB_Middle'] + 2 * rolling_std
            df['BB_Lower'] = df['BB_Middle'] - 2 * rolling_std
            df['ATR'] = (df['High'] - df['Low']).rolling(14).mean()
            df['MFI'] = 50  # Placeholder

        # Enhanced volatility and momentum
        df['Volatility'] = df['Close'].pct_change().rolling(20).std()
        df['High_Low_Ratio'] = (df['High'] - df['Low']) / (df['Close'] + 1e-12)
        df['Close_Open_Ratio'] = (df['Close'] - df['Open']) / (df['Open'] + 1e-12)
        df['ROC'] = df['Close'].pct_change(periods=10)
        df['Momentum'] = df['Close'] - df['Close'].shift(10)

        # Volume indicators
        df['Volume_MA'] = df['Volume'].rolling(20).mean()
        df['Volume_Ratio'] = df['Volume'] / (df['Volume_MA'] + 1e-12)

        # Price action features
        df['Price_Change'] = df['Close'].pct_change()
        df['High_Low_Spread'] = (df['High'] - df['Low']) / df['Close']
        df['Body_Size'] = abs(df['Close'] - df['Open']) / df['Close']
        df['Upper_Wick'] = (df['High'] - np.maximum(df['Open'], df['Close'])) / df['Close']
        df['Lower_Wick'] = (np.minimum(df['Open'], df['Close']) - df['Low']) / df['Close']

        # Trend and cycle features
        df['Trend_Up'] = (df['EMA_12'] > df['EMA_26']).astype(int)
        df['Trend_Down'] = (df['EMA_12'] < df['EMA_26']).astype(int)
        df['RSI_Not_Overbought'] = (df['RSI'] < 70).astype(int)
        df['RSI_Not_Oversold'] = (df['RSI'] > 30).astype(int)
        df['MACD_Positive'] = (df['MACD'] > df['MACDSignal']).astype(int)
        df['Close_Above_BB_Middle'] = (df['Close'] > df['BB_Middle']).astype(int)

        return df

    def add_quantum_features(self, df):
        """Advanced quantum-inspired features for super ensemble"""
        pct = df['Close'].pct_change().fillna(0)
        vol_pct = df['Close'].pct_change().rolling(20).std().fillna(0)

        # Quantum-inspired features
        df['Quantum_Entropy'] = - (pct * np.log(np.abs(pct) + 1e-10)).rolling(20).sum().fillna(0)
        df['Quantum_Phase'] = np.angle(pct + 1j * vol_pct)
        df['Quantum_Amplitude'] = np.abs(pct + 1j * vol_pct)
        df['Wavelet_Energy'] = df['Close'].rolling(20).var().fillna(0)

        # Algorithm-specific features
        df['Tree_Feature_1'] = df['RSI'] * df['MACD']  # For tree-based algorithms
        df['NN_Feature_1'] = np.sin(df['Quantum_Phase'])  # For neural networks
        df['Linear_Feature_1'] = df['Momentum'] / (df['ATR'] + 1e-10)  # For linear models
        df['Distance_Feature_1'] = df['Volatility'] ** 2  # For distance-based algorithms

        # Fractal and complexity features
        df['Fractal_Dimension'] = (df['High'] - df['Low']).rolling(20).std().fillna(0)
        df['Fractal_Efficiency'] = (df['Close'] - df['Close'].shift(20)).abs() / ((df['High'] - df['Low']).rolling(20).sum() + 1e-10)

        # Market microstructure
        df['Order_Flow'] = (df['Close'] - df['Open']) * df['Volume']
        df['Market_Depth'] = df['Volume'] / (df['High_Low_Spread'] + 1e-10)

        return df

    def process(self, df):
        df = df.copy()
        df = self.add_technical_indicators(df)
        df = self.add_quantum_features(df)
        df = df.fillna(method='bfill').fillna(method='ffill').fillna(0)

        exclude = ['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
        feature_cols = [c for c in df.columns if c not in exclude and not c.startswith('target')]
        if not feature_cols:
            raise RuntimeError('No features found after engineering')

        X = df[feature_cols].values
        Xs = self.scaler.fit_transform(X)
        pca_feat = self.pca.fit_transform(Xs)

        for i in range(pca_feat.shape[1]):
            df[f'PCA_{i}'] = pca_feat[:, i]

        final_features = feature_cols + [f'PCA_{i}' for i in range(pca_feat.shape[1])]
        return df, final_features


def create_base_learners(mode='fast'):
    """Create all base learners for the super ensemble"""

    if mode == 'fast':
        # Smaller, faster models for testing
        estimators = [
            ('xgb', xgb.XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss')),
            ('lgb', lgb.LGBMClassifier(n_estimators=100, max_depth=4, learning_rate=0.1, num_leaves=16)),
            ('rf', RandomForestClassifier(n_estimators=50, max_depth=6, random_state=42)),
            ('et', ExtraTreesClassifier(n_estimators=50, max_depth=6, random_state=42)),
            ('svm', SVC(probability=True, C=1.0, kernel='rbf', random_state=42)),
            ('knn', KNeighborsClassifier(n_neighbors=5, weights='distance')),
            ('lr', LogisticRegression(random_state=42, max_iter=1000)),
            ('nb', GaussianNB()),
        ]

        if CATBOOST_PRESENT:
            estimators.append(('cb', cb.CatBoostClassifier(iterations=100, depth=4, learning_rate=0.1, verbose=False)))

        # Simple neural network (will be built during training)
        nn_model = None  # Placeholder, will be built during training

        estimators.append(('nn', nn_model))

    else:
        # Full production models
        estimators = [
            ('xgb', xgb.XGBClassifier(n_estimators=500, max_depth=8, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, use_label_encoder=False, eval_metric='logloss')),
            ('lgb', lgb.LGBMClassifier(n_estimators=500, max_depth=8, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, num_leaves=64)),
            ('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),
            ('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),
            ('svm', SVC(probability=True, C=10.0, kernel='rbf', gamma='scale', random_state=42)),
            ('knn', KNeighborsClassifier(n_neighbors=10, weights='distance', algorithm='auto')),
            ('lr', LogisticRegression(random_state=42, max_iter=2000, C=1.0)),
            ('nb', GaussianNB()),
        ]

        if CATBOOST_PRESENT:
            estimators.append(('cb', cb.CatBoostClassifier(iterations=500, depth=8, learning_rate=0.05, verbose=False)))

        # Advanced neural network (will be built during training)
        nn_model = None  # Placeholder, will be built during training

        estimators.append(('nn', nn_model))

    return estimators


def create_meta_learner():
    """Create the meta-learner for stacking ensemble"""
    return LogisticRegression(random_state=42, max_iter=1000, C=1.0)


class KerasClassifierWrapper:
    """Wrapper to make Keras models compatible with sklearn calibration"""
    def __init__(self, keras_model):
        self.keras_model = keras_model
    
    def fit(self, X, y):
        # Model is already trained, just return self
        return self
    
    def predict_proba(self, X):
        # Keras predict returns probabilities for positive class
        proba_pos = self.keras_model.predict(X, verbose=0).ravel()
        proba_neg = 1 - proba_pos
        return np.column_stack([proba_neg, proba_pos])
    
    def predict(self, X):
        proba = self.predict_proba(X)
        return (proba[:, 1] > 0.5).astype(int)


def calibrate_probabilities(models, X_train, y_train, X_val, y_val):
    """Calibrate probabilities for better ensemble performance"""
    calibrated_models = {}

    for name, model in models:
        try:
            # Wrap neural network for sklearn compatibility
            if name == 'nn':
                model = KerasClassifierWrapper(model)
            
            # Use isotonic regression for calibration
            calibrated = CalibratedClassifierCV(model, method='isotonic', cv=3)
            calibrated.fit(X_train, y_train)
            calibrated_models[name] = calibrated
            print(f"Calibrated {name}")
        except Exception as e:
            print(f"Could not calibrate {name}: {e}")
            calibrated_models[name] = model

    return calibrated_models


def dynamic_weight_optimizer(weights, model_predictions, y_true):
    """Optimize weights for dynamic ensemble"""
    w = np.array(weights)
    if np.sum(w) <= 0:
        return 1.0
    w = w / np.sum(w)

    # Weighted ensemble prediction
    ensemble_pred = np.zeros_like(model_predictions[0])
    for i, pred in enumerate(model_predictions):
        ensemble_pred += w[i] * pred

    ensemble_pred = (ensemble_pred > 0.5).astype(int)
    return -accuracy_score(y_true, ensemble_pred)


def create_cross_validation_ensemble(estimators, X, y, n_folds=5):
    """Create cross-validation ensemble for robustness"""
    skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
    cv_predictions = {}
    cv_models = {}

    for name, estimator in estimators:
        cv_predictions[name] = []
        cv_models[name] = []

        for train_idx, val_idx in skf.split(X, y):
            X_fold_train, X_fold_val = X[train_idx], X[val_idx]
            y_fold_train, y_fold_val = y[train_idx], y[val_idx]

            try:
                model = estimator.__class__(**estimator.get_params()) if hasattr(estimator, 'get_params') else estimator
                if name == 'nn':
                    # Special handling for neural networks
                    model.fit(X_fold_train, y_fold_train, epochs=50, batch_size=32, verbose=0,
                             validation_data=(X_fold_val, y_fold_val))
                else:
                    model.fit(X_fold_train, y_fold_train)

                cv_models[name].append(model)

                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_fold_val)[:, 1]
                else:
                    pred = model.predict(X_fold_val).ravel()
                    if pred.max() > 1 or pred.min() < 0:
                        pred = (pred - pred.min()) / (pred.max() - pred.min())

                cv_predictions[name].append(pred)

            except Exception as e:
                print(f"Error training {name} in CV fold: {e}")
                cv_predictions[name].append(np.zeros(len(val_idx)))

    return cv_models, cv_predictions


def train_romeo_v8(data_path, timeframe='15m', mode='fast'):
    start = time.time()

    # Load and prepare data
    df = pd.read_csv(data_path, parse_dates=['Datetime'])
    df = df.sort_values('Datetime').reset_index(drop=True)

    if 'target' not in df.columns:
        df['target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

    # Feature engineering
    eng = SuperEnsembleFeatureEngineer()
    df_proc, features = eng.process(df)
    X = df_proc[features].values
    y = df['target'].values

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)

    print(f"Training Romeo V8 Super Ensemble ({mode}) with {len(features)} features")

    # Create base learners
    base_estimators = create_base_learners(mode)
    print(f"Created {len(base_estimators)} base learners")

    # Create cross-validation ensemble
    print("Creating cross-validation ensemble...")
    cv_models, cv_predictions = create_cross_validation_ensemble(base_estimators, X_train, y_train, n_folds=3)

    # Train main models on full training data
    trained_models = {}
    model_predictions = []

    for name, estimator in base_estimators:
        try:
            print(f"Training {name}...")
            if name == 'nn':
                # Neural network training with dynamic input shape
                # Build model with actual input shape
                sample_input = X_train[:1]  # Use one sample to determine shape
                nn_model = keras.Sequential([
                    keras.layers.Input(shape=(sample_input.shape[1],)),
                    keras.layers.Dense(32, activation='relu'),
                    keras.layers.Dropout(0.2),
                    keras.layers.Dense(16, activation='relu'),
                    keras.layers.Dense(1, activation='sigmoid')
                ])
                nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
                if mode == 'full':
                    # Rebuild for full mode
                    nn_model = keras.Sequential([
                        keras.layers.Input(shape=(sample_input.shape[1],)),
                        keras.layers.Dense(128, activation='relu'),
                        keras.layers.BatchNormalization(),
                        keras.layers.Dropout(0.3),
                        keras.layers.Dense(64, activation='relu'),
                        keras.layers.BatchNormalization(),
                        keras.layers.Dropout(0.2),
                        keras.layers.Dense(32, activation='relu'),
                        keras.layers.Dense(1, activation='sigmoid')
                    ])
                    nn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
                    nn_model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=0, validation_split=0.1)
                else:
                    nn_model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0, validation_split=0.1)
                estimator = nn_model
            else:
                estimator.fit(X_train, y_train)

            trained_models[name] = estimator

            # Get predictions for meta-learner training
            if hasattr(estimator, 'predict_proba'):
                pred = estimator.predict_proba(X_train)[:, 1]
            else:
                pred = estimator.predict(X_train).ravel()
                if pred.max() > 1 or pred.min() < 0:
                    pred = (pred - pred.min()) / (pred.max() - pred.min())

            model_predictions.append(pred.reshape(-1, 1))

        except Exception as e:
            print(f"Error training {name}: {e}")
            model_predictions.append(np.zeros((len(X_train), 1)))

    # Stack predictions for meta-learner
    X_meta = np.hstack(model_predictions)

    # Train meta-learner
    print("Training meta-learner...")
    meta_learner = create_meta_learner()
    meta_learner.fit(X_meta, y_train)

    # Calibrate probabilities
    print("Calibrating probabilities...")
    calibrated_models = calibrate_probabilities(list(trained_models.items()), X_train, y_train, X_test, y_test)

    # Optimize dynamic weights
    print("Optimizing dynamic weights...")
    n_models = len(trained_models)
    init_weights = np.ones(n_models) / n_models

    # Get test predictions for weight optimization
    test_predictions = []
    for name, model in calibrated_models.items():
        if hasattr(model, 'predict_proba'):
            pred = model.predict_proba(X_test)[:, 1]
        else:
            pred = model.predict(X_test).ravel()
        test_predictions.append(pred)

    try:
        res = minimize(dynamic_weight_optimizer, init_weights, args=(test_predictions, y_test),
                      bounds=[(0.0, 1.0)] * n_models, method='SLSQP')
        optimal_weights = res.x if res.success else init_weights
        optimal_weights = optimal_weights / np.sum(optimal_weights)
    except Exception as e:
        print(f"Weight optimization failed: {e}")
        optimal_weights = init_weights

    print(f"Optimal weights: {dict(zip(trained_models.keys(), optimal_weights))}")

    # Save super ensemble artifact
    os.makedirs('../models_romeo_v8', exist_ok=True)

    artifact = {
        'models': trained_models,
        'calibrated_models': calibrated_models,
        'meta_learner': meta_learner,
        'cv_models': cv_models,
        'cv_predictions': cv_predictions,
        'weights': optimal_weights.tolist(),
        'features': features,
        'scaler': eng.scaler,
        'pca': eng.pca,
        'super_ensemble_config': {
            'n_base_learners': len(trained_models),
            'meta_learner_type': 'LogisticRegression',
            'calibration_method': 'isotonic',
            'cv_folds': 3,
            'dynamic_weighting': True,
            'stacking_enabled': True,
        }
    }

    joblib.dump(artifact, f'../models_romeo_v8/trading_model_romeo_{timeframe}.pkl')

    elapsed = time.time() - start
    print(f"Finished training Romeo V8 Super Ensemble in {elapsed:.1f}s")
    print(f"Super ensemble includes {len(trained_models)} algorithms working together")
    print("Features: stacking, calibration, dynamic weighting, cross-validation")

    return artifact


class SuperEnsemble:
    """Super Ensemble combining 10+ algorithms with advanced collaboration features"""

    def __init__(self, artifact):
        self.models = artifact['models']
        self.calibrated_models = artifact['calibrated_models']
        self.meta_learner = artifact['meta_learner']
        self.weights = np.array(artifact['weights'])
        self.features = artifact['features']
        self.scaler = artifact['scaler']
        self.pca = artifact['pca']
        self.cv_models = artifact.get('cv_models', {})
        self.cv_predictions = artifact.get('cv_predictions', {})
        self.config = artifact.get('super_ensemble_config', {})

    def predict_proba(self, X):
        """Generate probability predictions using super ensemble"""
        if X.ndim == 1:
            X = X.reshape(1, -1)

        # Scale and PCA transform
        X_scaled = self.scaler.transform(X)
        X_pca = self.pca.transform(X_scaled)

        # Combine original and PCA features
        X_combined = np.hstack([X_scaled, X_pca])

        # Get predictions from all calibrated models
        model_predictions = []
        for name, model in self.calibrated_models.items():
            try:
                if hasattr(model, 'predict_proba'):
                    pred = model.predict_proba(X_combined)[:, 1]
                else:
                    pred = model.predict(X_combined).ravel()
                    if pred.max() > 1 or pred.min() < 0:
                        pred = (pred - pred.min()) / (pred.max() - pred.min())
                model_predictions.append(pred.reshape(-1, 1))
            except Exception as e:
                print(f"Error predicting with {name}: {e}")
                model_predictions.append(np.zeros((X_combined.shape[0], 1)))

        # Stack predictions for meta-learner
        X_meta = np.hstack(model_predictions)

        # Meta-learner prediction
        meta_proba = self.meta_learner.predict_proba(X_meta)[:, 1]

        # Dynamic weighted ensemble
        weighted_proba = np.zeros(X_combined.shape[0])
        for i, pred in enumerate(model_predictions):
            weighted_proba += self.weights[i] * pred.ravel()

        # Fusion of meta-learner and weighted ensemble
        final_proba = 0.7 * meta_proba + 0.3 * weighted_proba

        # Confidence calibration using cross-validation ensemble
        if self.cv_models:
            cv_confidence = self._get_cv_confidence(X_combined)
            final_proba = final_proba * cv_confidence + (1 - cv_confidence) * 0.5

        return np.column_stack([1 - final_proba, final_proba])

    def predict(self, X, threshold=0.5):
        """Generate binary predictions"""
        proba = self.predict_proba(X)[:, 1]
        return (proba > threshold).astype(int)

    def _get_cv_confidence(self, X):
        """Get confidence from cross-validation ensemble"""
        cv_probas = []
        for name, models_list in self.cv_models.items():
            fold_probas = []
            for model in models_list:
                try:
                    if hasattr(model, 'predict_proba'):
                        proba = model.predict_proba(X)[:, 1]
                    else:
                        proba = model.predict(X).ravel()
                    fold_probas.append(proba)
                except:
                    continue
            if fold_probas:
                cv_probas.append(np.mean(fold_probas, axis=0))

        if cv_probas:
            mean_cv_proba = np.mean(cv_probas, axis=0)
            confidence = 1 - np.abs(mean_cv_proba - 0.5) * 2  # Higher confidence when closer to 0 or 1
            return confidence
        else:
            return np.full(X.shape[0], 0.5)

    def get_feature_importance(self):
        """Get feature importance from tree-based models"""
        importance_dict = {}
        tree_models = ['xgb', 'lgb', 'rf', 'et']

        for name in tree_models:
            if name in self.models and hasattr(self.models[name], 'feature_importances_'):
                importance_dict[name] = self.models[name].feature_importances_

        return importance_dict

    def get_model_weights(self):
        """Get the optimized weights for each model"""
        return dict(zip(self.calibrated_models.keys(), self.weights))


def load_romeo_v8(model_path):
    """Load Romeo V8 super ensemble"""
    artifact = joblib.load(model_path)
    return SuperEnsemble(artifact)


# Add this after the train_romeo_v8 function
def test_super_ensemble():
    """Test the super ensemble on sample data"""
    try:
        # Load a trained model
        model = load_romeo_v8('models_romeo_v8/trading_model_romeo_15m.pkl')

        # Load test data
        df = pd.read_csv('data_xauusd_v3/15m_data_v3.csv', parse_dates=['Datetime'])
        df = df.sort_values('Datetime').reset_index(drop=True)

        if 'target' not in df.columns:
            df['target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

        # Feature engineering (same as training but without fitting scaler/PCA)
        eng = SuperEnsembleFeatureEngineer()
        df = eng.add_technical_indicators(df)
        df = eng.add_quantum_features(df)
        df = df.fillna(method='bfill').fillna(method='ffill').fillna(0)

        exclude = ['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
        feature_cols = [c for c in df.columns if c not in exclude and not c.startswith('target')]

        # Take last 100 samples for testing
        X_test = df[feature_cols].values[-100:]
        y_test = df['target'].values[-100:]

        # Make predictions using the SuperEnsemble
        proba = model.predict_proba(X_test)
        preds = model.predict(X_test)

        accuracy = accuracy_score(y_test, preds)
        auc = roc_auc_score(y_test, proba[:, 1])

        print(f"Super Ensemble Test Results:")
        print(f"Accuracy: {accuracy:.4f}")
        print(f"AUC: {auc:.4f}")
        print(f"Model weights: {model.get_model_weights()}")

        return accuracy, auc

    except Exception as e:
        print(f"Error testing super ensemble: {e}")
        return None, None


# Update main function to include testing
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--data', default='data_xauusd_v3/15m_data_v3.csv')
    parser.add_argument('--timeframe', default='15m')
    parser.add_argument('--mode', choices=['fast', 'full'], default='fast')
    parser.add_argument('--test', action='store_true', help='Test the trained model')
    args = parser.parse_args()

    art = train_romeo_v8(args.data, timeframe=args.timeframe, mode=args.mode)
    print('Saved artifact keys:', list(art.keys()))

    if args.test:
        print("\nTesting super ensemble...")
        acc, auc = test_super_ensemble()
        if acc is not None:
            print(f"✓ Test completed - Accuracy: {acc:.4f}, AUC: {auc:.4f}")


if __name__ == '__main__':
    main()