v8/train_v8.py · JonusNattapong/romeo-v8-super-ensemble-trading-ai at main

romeo-v8-super-ensemble-trading-ai / v8 /train_v8.py

Upload v8/train_v8.py with huggingface_hub

b45048d verified 3 months ago

28.5 kB

	"""
	Romeo V8 training script - Super Ensemble with Multi-Algorithm Collaboration

	Advanced ensemble model that combines 10+ different algorithms working together
	for maximum accuracy and efficiency. Features stacking ensemble, dynamic weighting,
	confidence calibration, and cross-validation ensemble.

	Key Features:
	- 10+ Base Algorithms: XGBoost, LightGBM, CatBoost, RandomForest, ExtraTrees,
	Neural Network, SVM, KNN, Logistic Regression, Naive Bayes
	- Stacking Ensemble: Meta-learner learns from base learner predictions
	- Dynamic Weighting: Real-time weight adjustment based on performance
	- Confidence Calibration: Probability calibration for better fusion
	- Cross-Validation Ensemble: Multiple CV folds combined
	- Advanced Feature Engineering: Algorithm-specific feature optimization

	Modes:
	- fast (default): smaller models, fewer algorithms, for smoke testing
	- full: all algorithms, larger models, comprehensive training
	"""

	import argparse
	import os
	import json
	import time
	import warnings
	warnings.filterwarnings('ignore')

	import numpy as np
	import pandas as pd
	from sklearn.model_selection import train_test_split, StratifiedKFold
	from sklearn.preprocessing import StandardScaler
	from sklearn.decomposition import PCA
	from sklearn.metrics import accuracy_score, roc_auc_score, log_loss
	from sklearn.calibration import CalibratedClassifierCV

	# Base Algorithms
	import xgboost as xgb
	import lightgbm as lgb
	from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
	from sklearn.svm import SVC
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.naive_bayes import GaussianNB

	# Neural Network
	import tensorflow as tf
	from tensorflow import keras

	# Stacking and utilities
	from sklearn.ensemble import StackingClassifier
	import joblib
	from scipy.optimize import minimize
	from scipy.special import softmax

	try:
	import catboost as cb
	CATBOOST_PRESENT = True
	except Exception:
	CATBOOST_PRESENT = False
	print("CatBoost not available, will skip CatBoost algorithm")

	try:
	import talib
	TALIB_PRESENT = True
	except Exception:
	TALIB_PRESENT = False


	class SumAxis1Layer(keras.layers.Layer):
	def call(self, inputs):
	return keras.backend.sum(inputs, axis=1)


	def sma(series, window):
	return series.rolling(window).mean()


	def ema(series, span):
	return series.ewm(span=span, adjust=False).mean()


	def rsi(series, period=14):
	delta = series.diff()
	up = delta.clip(lower=0)
	down = -1 * delta.clip(upper=0)
	ma_up = up.ewm(alpha=1/period, adjust=False).mean()
	ma_down = down.ewm(alpha=1/period, adjust=False).mean()
	rs = ma_up / (ma_down + 1e-12)
	return 100 - (100 / (1 + rs))


	class SuperEnsembleFeatureEngineer:
	def __init__(self):
	self.scaler = StandardScaler()
	self.pca = PCA(n_components=0.95)

	def add_technical_indicators(self, df):
	"""Enhanced technical indicators optimized for multiple algorithms"""
	if TALIB_PRESENT:
	df['SMA_20'] = talib.SMA(df['Close'], timeperiod=20)
	df['SMA_50'] = talib.SMA(df['Close'], timeperiod=50)
	df['EMA_12'] = talib.EMA(df['Close'], timeperiod=12)
	df['EMA_26'] = talib.EMA(df['Close'], timeperiod=26)
	df['RSI'] = talib.RSI(df['Close'], timeperiod=14)
	macd, macdsig, macdhist = talib.MACD(df['Close'], fastperiod=12, slowperiod=26, signalperiod=9)
	df['MACD'] = macd
	df['MACDSignal'] = macdsig
	upper, mid, lower = talib.BBANDS(df['Close'], timeperiod=20)
	df['BB_Upper'] = upper
	df['BB_Middle'] = mid
	df['BB_Lower'] = lower
	df['ATR'] = talib.ATR(df['High'], df['Low'], df['Close'], timeperiod=14)
	df['MFI'] = talib.MFI(df['High'], df['Low'], df['Close'], df['Volume'], timeperiod=14)
	else:
	df['SMA_20'] = sma(df['Close'], 20)
	df['SMA_50'] = sma(df['Close'], 50)
	df['EMA_12'] = ema(df['Close'], 12)
	df['EMA_26'] = ema(df['Close'], 26)
	df['RSI'] = rsi(df['Close'], 14)
	df['MACD'] = df['Close'].ewm(span=12, adjust=False).mean() - df['Close'].ewm(span=26, adjust=False).mean()
	df['MACDSignal'] = df['MACD'].ewm(span=9, adjust=False).mean()
	rolling_std = df['Close'].rolling(20).std()
	df['BB_Middle'] = df['Close'].rolling(20).mean()
	df['BB_Upper'] = df['BB_Middle'] + 2 * rolling_std
	df['BB_Lower'] = df['BB_Middle'] - 2 * rolling_std
	df['ATR'] = (df['High'] - df['Low']).rolling(14).mean()
	df['MFI'] = 50 # Placeholder

	# Enhanced volatility and momentum
	df['Volatility'] = df['Close'].pct_change().rolling(20).std()
	df['High_Low_Ratio'] = (df['High'] - df['Low']) / (df['Close'] + 1e-12)
	df['Close_Open_Ratio'] = (df['Close'] - df['Open']) / (df['Open'] + 1e-12)
	df['ROC'] = df['Close'].pct_change(periods=10)
	df['Momentum'] = df['Close'] - df['Close'].shift(10)

	# Volume indicators
	df['Volume_MA'] = df['Volume'].rolling(20).mean()
	df['Volume_Ratio'] = df['Volume'] / (df['Volume_MA'] + 1e-12)

	# Price action features
	df['Price_Change'] = df['Close'].pct_change()
	df['High_Low_Spread'] = (df['High'] - df['Low']) / df['Close']
	df['Body_Size'] = abs(df['Close'] - df['Open']) / df['Close']
	df['Upper_Wick'] = (df['High'] - np.maximum(df['Open'], df['Close'])) / df['Close']
	df['Lower_Wick'] = (np.minimum(df['Open'], df['Close']) - df['Low']) / df['Close']

	# Trend and cycle features
	df['Trend_Up'] = (df['EMA_12'] > df['EMA_26']).astype(int)
	df['Trend_Down'] = (df['EMA_12'] < df['EMA_26']).astype(int)
	df['RSI_Not_Overbought'] = (df['RSI'] < 70).astype(int)
	df['RSI_Not_Oversold'] = (df['RSI'] > 30).astype(int)
	df['MACD_Positive'] = (df['MACD'] > df['MACDSignal']).astype(int)
	df['Close_Above_BB_Middle'] = (df['Close'] > df['BB_Middle']).astype(int)

	return df

	def add_quantum_features(self, df):
	"""Advanced quantum-inspired features for super ensemble"""
	pct = df['Close'].pct_change().fillna(0)
	vol_pct = df['Close'].pct_change().rolling(20).std().fillna(0)

	# Quantum-inspired features
	df['Quantum_Entropy'] = - (pct * np.log(np.abs(pct) + 1e-10)).rolling(20).sum().fillna(0)
	df['Quantum_Phase'] = np.angle(pct + 1j * vol_pct)
	df['Quantum_Amplitude'] = np.abs(pct + 1j * vol_pct)
	df['Wavelet_Energy'] = df['Close'].rolling(20).var().fillna(0)

	# Algorithm-specific features
	df['Tree_Feature_1'] = df['RSI'] * df['MACD'] # For tree-based algorithms
	df['NN_Feature_1'] = np.sin(df['Quantum_Phase']) # For neural networks
	df['Linear_Feature_1'] = df['Momentum'] / (df['ATR'] + 1e-10) # For linear models
	df['Distance_Feature_1'] = df['Volatility'] ** 2 # For distance-based algorithms

	# Fractal and complexity features
	df['Fractal_Dimension'] = (df['High'] - df['Low']).rolling(20).std().fillna(0)
	df['Fractal_Efficiency'] = (df['Close'] - df['Close'].shift(20)).abs() / ((df['High'] - df['Low']).rolling(20).sum() + 1e-10)

	# Market microstructure
	df['Order_Flow'] = (df['Close'] - df['Open']) * df['Volume']
	df['Market_Depth'] = df['Volume'] / (df['High_Low_Spread'] + 1e-10)

	return df

	def process(self, df):
	df = df.copy()
	df = self.add_technical_indicators(df)
	df = self.add_quantum_features(df)
	df = df.fillna(method='bfill').fillna(method='ffill').fillna(0)

	exclude = ['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
	feature_cols = [c for c in df.columns if c not in exclude and not c.startswith('target')]
	if not feature_cols:
	raise RuntimeError('No features found after engineering')

	X = df[feature_cols].values
	Xs = self.scaler.fit_transform(X)
	pca_feat = self.pca.fit_transform(Xs)

	for i in range(pca_feat.shape[1]):
	df[f'PCA_{i}'] = pca_feat[:, i]

	final_features = feature_cols + [f'PCA_{i}' for i in range(pca_feat.shape[1])]
	return df, final_features


	def create_base_learners(mode='fast'):
	"""Create all base learners for the super ensemble"""

	if mode == 'fast':
	# Smaller, faster models for testing
	estimators = [
	('xgb', xgb.XGBClassifier(n_estimators=100, max_depth=4, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss')),
	('lgb', lgb.LGBMClassifier(n_estimators=100, max_depth=4, learning_rate=0.1, num_leaves=16)),
	('rf', RandomForestClassifier(n_estimators=50, max_depth=6, random_state=42)),
	('et', ExtraTreesClassifier(n_estimators=50, max_depth=6, random_state=42)),
	('svm', SVC(probability=True, C=1.0, kernel='rbf', random_state=42)),
	('knn', KNeighborsClassifier(n_neighbors=5, weights='distance')),
	('lr', LogisticRegression(random_state=42, max_iter=1000)),
	('nb', GaussianNB()),
	]

	if CATBOOST_PRESENT:
	estimators.append(('cb', cb.CatBoostClassifier(iterations=100, depth=4, learning_rate=0.1, verbose=False)))

	# Simple neural network (will be built during training)
	nn_model = None # Placeholder, will be built during training

	estimators.append(('nn', nn_model))

	else:
	# Full production models
	estimators = [
	('xgb', xgb.XGBClassifier(n_estimators=500, max_depth=8, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, use_label_encoder=False, eval_metric='logloss')),
	('lgb', lgb.LGBMClassifier(n_estimators=500, max_depth=8, learning_rate=0.05, subsample=0.8, colsample_bytree=0.8, num_leaves=64)),
	('rf', RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)),
	('et', ExtraTreesClassifier(n_estimators=200, max_depth=10, random_state=42)),
	('svm', SVC(probability=True, C=10.0, kernel='rbf', gamma='scale', random_state=42)),
	('knn', KNeighborsClassifier(n_neighbors=10, weights='distance', algorithm='auto')),
	('lr', LogisticRegression(random_state=42, max_iter=2000, C=1.0)),
	('nb', GaussianNB()),
	]

	if CATBOOST_PRESENT:
	estimators.append(('cb', cb.CatBoostClassifier(iterations=500, depth=8, learning_rate=0.05, verbose=False)))

	# Advanced neural network (will be built during training)
	nn_model = None # Placeholder, will be built during training

	estimators.append(('nn', nn_model))

	return estimators


	def create_meta_learner():
	"""Create the meta-learner for stacking ensemble"""
	return LogisticRegression(random_state=42, max_iter=1000, C=1.0)


	class KerasClassifierWrapper:
	"""Wrapper to make Keras models compatible with sklearn calibration"""
	def __init__(self, keras_model):
	self.keras_model = keras_model

	def fit(self, X, y):
	# Model is already trained, just return self
	return self

	def predict_proba(self, X):
	# Keras predict returns probabilities for positive class
	proba_pos = self.keras_model.predict(X, verbose=0).ravel()
	proba_neg = 1 - proba_pos
	return np.column_stack([proba_neg, proba_pos])

	def predict(self, X):
	proba = self.predict_proba(X)
	return (proba[:, 1] > 0.5).astype(int)


	def calibrate_probabilities(models, X_train, y_train, X_val, y_val):
	"""Calibrate probabilities for better ensemble performance"""
	calibrated_models = {}

	for name, model in models:
	try:
	# Wrap neural network for sklearn compatibility
	if name == 'nn':
	model = KerasClassifierWrapper(model)

	# Use isotonic regression for calibration
	calibrated = CalibratedClassifierCV(model, method='isotonic', cv=3)
	calibrated.fit(X_train, y_train)
	calibrated_models[name] = calibrated
	print(f"Calibrated {name}")
	except Exception as e:
	print(f"Could not calibrate {name}: {e}")
	calibrated_models[name] = model

	return calibrated_models


	def dynamic_weight_optimizer(weights, model_predictions, y_true):
	"""Optimize weights for dynamic ensemble"""
	w = np.array(weights)
	if np.sum(w) <= 0:
	return 1.0
	w = w / np.sum(w)

	# Weighted ensemble prediction
	ensemble_pred = np.zeros_like(model_predictions[0])
	for i, pred in enumerate(model_predictions):
	ensemble_pred += w[i] * pred

	ensemble_pred = (ensemble_pred > 0.5).astype(int)
	return -accuracy_score(y_true, ensemble_pred)


	def create_cross_validation_ensemble(estimators, X, y, n_folds=5):
	"""Create cross-validation ensemble for robustness"""
	skf = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)
	cv_predictions = {}
	cv_models = {}

	for name, estimator in estimators:
	cv_predictions[name] = []
	cv_models[name] = []

	for train_idx, val_idx in skf.split(X, y):
	X_fold_train, X_fold_val = X[train_idx], X[val_idx]
	y_fold_train, y_fold_val = y[train_idx], y[val_idx]

	try:
	model = estimator.__class__(**estimator.get_params()) if hasattr(estimator, 'get_params') else estimator
	if name == 'nn':
	# Special handling for neural networks
	model.fit(X_fold_train, y_fold_train, epochs=50, batch_size=32, verbose=0,
	validation_data=(X_fold_val, y_fold_val))
	else:
	model.fit(X_fold_train, y_fold_train)

	cv_models[name].append(model)

	if hasattr(model, 'predict_proba'):
	pred = model.predict_proba(X_fold_val)[:, 1]
	else:
	pred = model.predict(X_fold_val).ravel()
	if pred.max() > 1 or pred.min() < 0:
	pred = (pred - pred.min()) / (pred.max() - pred.min())

	cv_predictions[name].append(pred)

	except Exception as e:
	print(f"Error training {name} in CV fold: {e}")
	cv_predictions[name].append(np.zeros(len(val_idx)))

	return cv_models, cv_predictions


	def train_romeo_v8(data_path, timeframe='15m', mode='fast'):
	start = time.time()

	# Load and prepare data
	df = pd.read_csv(data_path, parse_dates=['Datetime'])
	df = df.sort_values('Datetime').reset_index(drop=True)

	if 'target' not in df.columns:
	df['target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

	# Feature engineering
	eng = SuperEnsembleFeatureEngineer()
	df_proc, features = eng.process(df)
	X = df_proc[features].values
	y = df['target'].values

	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)

	print(f"Training Romeo V8 Super Ensemble ({mode}) with {len(features)} features")

	# Create base learners
	base_estimators = create_base_learners(mode)
	print(f"Created {len(base_estimators)} base learners")

	# Create cross-validation ensemble
	print("Creating cross-validation ensemble...")
	cv_models, cv_predictions = create_cross_validation_ensemble(base_estimators, X_train, y_train, n_folds=3)

	# Train main models on full training data
	trained_models = {}
	model_predictions = []

	for name, estimator in base_estimators:
	try:
	print(f"Training {name}...")
	if name == 'nn':
	# Neural network training with dynamic input shape
	# Build model with actual input shape
	sample_input = X_train[:1] # Use one sample to determine shape
	nn_model = keras.Sequential([
	keras.layers.Input(shape=(sample_input.shape[1],)),
	keras.layers.Dense(32, activation='relu'),
	keras.layers.Dropout(0.2),
	keras.layers.Dense(16, activation='relu'),
	keras.layers.Dense(1, activation='sigmoid')
	])
	nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
	if mode == 'full':
	# Rebuild for full mode
	nn_model = keras.Sequential([
	keras.layers.Input(shape=(sample_input.shape[1],)),
	keras.layers.Dense(128, activation='relu'),
	keras.layers.BatchNormalization(),
	keras.layers.Dropout(0.3),
	keras.layers.Dense(64, activation='relu'),
	keras.layers.BatchNormalization(),
	keras.layers.Dropout(0.2),
	keras.layers.Dense(32, activation='relu'),
	keras.layers.Dense(1, activation='sigmoid')
	])
	nn_model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
	nn_model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=0, validation_split=0.1)
	else:
	nn_model.fit(X_train, y_train, epochs=20, batch_size=64, verbose=0, validation_split=0.1)
	estimator = nn_model
	else:
	estimator.fit(X_train, y_train)

	trained_models[name] = estimator

	# Get predictions for meta-learner training
	if hasattr(estimator, 'predict_proba'):
	pred = estimator.predict_proba(X_train)[:, 1]
	else:
	pred = estimator.predict(X_train).ravel()
	if pred.max() > 1 or pred.min() < 0:
	pred = (pred - pred.min()) / (pred.max() - pred.min())

	model_predictions.append(pred.reshape(-1, 1))

	except Exception as e:
	print(f"Error training {name}: {e}")
	model_predictions.append(np.zeros((len(X_train), 1)))

	# Stack predictions for meta-learner
	X_meta = np.hstack(model_predictions)

	# Train meta-learner
	print("Training meta-learner...")
	meta_learner = create_meta_learner()
	meta_learner.fit(X_meta, y_train)

	# Calibrate probabilities
	print("Calibrating probabilities...")
	calibrated_models = calibrate_probabilities(list(trained_models.items()), X_train, y_train, X_test, y_test)

	# Optimize dynamic weights
	print("Optimizing dynamic weights...")
	n_models = len(trained_models)
	init_weights = np.ones(n_models) / n_models

	# Get test predictions for weight optimization
	test_predictions = []
	for name, model in calibrated_models.items():
	if hasattr(model, 'predict_proba'):
	pred = model.predict_proba(X_test)[:, 1]
	else:
	pred = model.predict(X_test).ravel()
	test_predictions.append(pred)

	try:
	res = minimize(dynamic_weight_optimizer, init_weights, args=(test_predictions, y_test),
	bounds=[(0.0, 1.0)] * n_models, method='SLSQP')
	optimal_weights = res.x if res.success else init_weights
	optimal_weights = optimal_weights / np.sum(optimal_weights)
	except Exception as e:
	print(f"Weight optimization failed: {e}")
	optimal_weights = init_weights

	print(f"Optimal weights: {dict(zip(trained_models.keys(), optimal_weights))}")

	# Save super ensemble artifact
	os.makedirs('../models_romeo_v8', exist_ok=True)

	artifact = {
	'models': trained_models,
	'calibrated_models': calibrated_models,
	'meta_learner': meta_learner,
	'cv_models': cv_models,
	'cv_predictions': cv_predictions,
	'weights': optimal_weights.tolist(),
	'features': features,
	'scaler': eng.scaler,
	'pca': eng.pca,
	'super_ensemble_config': {
	'n_base_learners': len(trained_models),
	'meta_learner_type': 'LogisticRegression',
	'calibration_method': 'isotonic',
	'cv_folds': 3,
	'dynamic_weighting': True,
	'stacking_enabled': True,
	}
	}

	joblib.dump(artifact, f'../models_romeo_v8/trading_model_romeo_{timeframe}.pkl')

	elapsed = time.time() - start
	print(f"Finished training Romeo V8 Super Ensemble in {elapsed:.1f}s")
	print(f"Super ensemble includes {len(trained_models)} algorithms working together")
	print("Features: stacking, calibration, dynamic weighting, cross-validation")

	return artifact


	class SuperEnsemble:
	"""Super Ensemble combining 10+ algorithms with advanced collaboration features"""

	def __init__(self, artifact):
	self.models = artifact['models']
	self.calibrated_models = artifact['calibrated_models']
	self.meta_learner = artifact['meta_learner']
	self.weights = np.array(artifact['weights'])
	self.features = artifact['features']
	self.scaler = artifact['scaler']
	self.pca = artifact['pca']
	self.cv_models = artifact.get('cv_models', {})
	self.cv_predictions = artifact.get('cv_predictions', {})
	self.config = artifact.get('super_ensemble_config', {})

	def predict_proba(self, X):
	"""Generate probability predictions using super ensemble"""
	if X.ndim == 1:
	X = X.reshape(1, -1)

	# Scale and PCA transform
	X_scaled = self.scaler.transform(X)
	X_pca = self.pca.transform(X_scaled)

	# Combine original and PCA features
	X_combined = np.hstack([X_scaled, X_pca])

	# Get predictions from all calibrated models
	model_predictions = []
	for name, model in self.calibrated_models.items():
	try:
	if hasattr(model, 'predict_proba'):
	pred = model.predict_proba(X_combined)[:, 1]
	else:
	pred = model.predict(X_combined).ravel()
	if pred.max() > 1 or pred.min() < 0:
	pred = (pred - pred.min()) / (pred.max() - pred.min())
	model_predictions.append(pred.reshape(-1, 1))
	except Exception as e:
	print(f"Error predicting with {name}: {e}")
	model_predictions.append(np.zeros((X_combined.shape[0], 1)))

	# Stack predictions for meta-learner
	X_meta = np.hstack(model_predictions)

	# Meta-learner prediction
	meta_proba = self.meta_learner.predict_proba(X_meta)[:, 1]

	# Dynamic weighted ensemble
	weighted_proba = np.zeros(X_combined.shape[0])
	for i, pred in enumerate(model_predictions):
	weighted_proba += self.weights[i] * pred.ravel()

	# Fusion of meta-learner and weighted ensemble
	final_proba = 0.7 * meta_proba + 0.3 * weighted_proba

	# Confidence calibration using cross-validation ensemble
	if self.cv_models:
	cv_confidence = self._get_cv_confidence(X_combined)
	final_proba = final_proba * cv_confidence + (1 - cv_confidence) * 0.5

	return np.column_stack([1 - final_proba, final_proba])

	def predict(self, X, threshold=0.5):
	"""Generate binary predictions"""
	proba = self.predict_proba(X)[:, 1]
	return (proba > threshold).astype(int)

	def _get_cv_confidence(self, X):
	"""Get confidence from cross-validation ensemble"""
	cv_probas = []
	for name, models_list in self.cv_models.items():
	fold_probas = []
	for model in models_list:
	try:
	if hasattr(model, 'predict_proba'):
	proba = model.predict_proba(X)[:, 1]
	else:
	proba = model.predict(X).ravel()
	fold_probas.append(proba)
	except:
	continue
	if fold_probas:
	cv_probas.append(np.mean(fold_probas, axis=0))

	if cv_probas:
	mean_cv_proba = np.mean(cv_probas, axis=0)
	confidence = 1 - np.abs(mean_cv_proba - 0.5) * 2 # Higher confidence when closer to 0 or 1
	return confidence
	else:
	return np.full(X.shape[0], 0.5)

	def get_feature_importance(self):
	"""Get feature importance from tree-based models"""
	importance_dict = {}
	tree_models = ['xgb', 'lgb', 'rf', 'et']

	for name in tree_models:
	if name in self.models and hasattr(self.models[name], 'feature_importances_'):
	importance_dict[name] = self.models[name].feature_importances_

	return importance_dict

	def get_model_weights(self):
	"""Get the optimized weights for each model"""
	return dict(zip(self.calibrated_models.keys(), self.weights))


	def load_romeo_v8(model_path):
	"""Load Romeo V8 super ensemble"""
	artifact = joblib.load(model_path)
	return SuperEnsemble(artifact)


	# Add this after the train_romeo_v8 function
	def test_super_ensemble():
	"""Test the super ensemble on sample data"""
	try:
	# Load a trained model
	model = load_romeo_v8('models_romeo_v8/trading_model_romeo_15m.pkl')

	# Load test data
	df = pd.read_csv('data_xauusd_v3/15m_data_v3.csv', parse_dates=['Datetime'])
	df = df.sort_values('Datetime').reset_index(drop=True)

	if 'target' not in df.columns:
	df['target'] = (df['Close'].shift(-1) > df['Close']).astype(int)

	# Feature engineering (same as training but without fitting scaler/PCA)
	eng = SuperEnsembleFeatureEngineer()
	df = eng.add_technical_indicators(df)
	df = eng.add_quantum_features(df)
	df = df.fillna(method='bfill').fillna(method='ffill').fillna(0)

	exclude = ['Datetime', 'Open', 'High', 'Low', 'Close', 'Volume', 'Adj Close']
	feature_cols = [c for c in df.columns if c not in exclude and not c.startswith('target')]

	# Take last 100 samples for testing
	X_test = df[feature_cols].values[-100:]
	y_test = df['target'].values[-100:]

	# Make predictions using the SuperEnsemble
	proba = model.predict_proba(X_test)
	preds = model.predict(X_test)

	accuracy = accuracy_score(y_test, preds)
	auc = roc_auc_score(y_test, proba[:, 1])

	print(f"Super Ensemble Test Results:")
	print(f"Accuracy: {accuracy:.4f}")
	print(f"AUC: {auc:.4f}")
	print(f"Model weights: {model.get_model_weights()}")

	return accuracy, auc

	except Exception as e:
	print(f"Error testing super ensemble: {e}")
	return None, None


	# Update main function to include testing
	def main():
	parser = argparse.ArgumentParser()
	parser.add_argument('--data', default='data_xauusd_v3/15m_data_v3.csv')
	parser.add_argument('--timeframe', default='15m')
	parser.add_argument('--mode', choices=['fast', 'full'], default='fast')
	parser.add_argument('--test', action='store_true', help='Test the trained model')
	args = parser.parse_args()

	art = train_romeo_v8(args.data, timeframe=args.timeframe, mode=args.mode)
	print('Saved artifact keys:', list(art.keys()))

	if args.test:
	print("\nTesting super ensemble...")
	acc, auc = test_super_ensemble()
	if acc is not None:
	print(f"✓ Test completed - Accuracy: {acc:.4f}, AUC: {auc:.4f}")


	if __name__ == '__main__':
	main()