CraigRoberts15's picture
Initial commit: Business Intelligence Dashboard with Git LFS
c51e926
raw
history blame
18.2 kB
"""
Unit Tests for Insights Module
Comprehensive tests for all insight strategies and the insight manager.
Author: Craig
Date: December 2024
"""
import pytest
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from insights import (
InsightStrategy, TopBottomPerformers, TrendAnalysis,
AnomalyDetection, DistributionInsights, CorrelationInsights,
InsightManager
)
# ============================================================================
# FIXTURES
# ============================================================================
@pytest.fixture
def sales_data():
"""Create sample sales data."""
return pd.DataFrame({
'product': ['A', 'B', 'C', 'D', 'E'] * 20,
'sales': np.random.randint(100, 1000, 100),
'revenue': np.random.uniform(1000, 5000, 100),
'region': np.random.choice(['North', 'South', 'East', 'West'], 100)
})
@pytest.fixture
def time_series_data():
"""Create sample time series data."""
dates = pd.date_range('2024-01-01', periods=100, freq='D')
values = np.cumsum(np.random.randn(100)) + 100 # Random walk with trend
return pd.DataFrame({
'date': dates,
'value': values,
'sales': np.random.randint(50, 200, 100)
})
@pytest.fixture
def anomaly_data():
"""Create data with anomalies."""
# Normal data with a few outliers
normal = np.random.normal(100, 10, 95)
outliers = np.array([200, 10, 250, 5, 220])
data = np.concatenate([normal, outliers])
np.random.shuffle(data)
return pd.DataFrame({
'values': data,
'category': np.random.choice(['A', 'B', 'C'], 100)
})
@pytest.fixture
def correlation_data():
"""Create data with correlations."""
np.random.seed(42)
x = np.random.normal(50, 10, 100)
y = 2 * x + np.random.normal(0, 5, 100) # Strong positive correlation
z = -1.5 * x + np.random.normal(0, 8, 100) # Strong negative correlation
w = np.random.normal(100, 15, 100) # No correlation
return pd.DataFrame({
'var_x': x,
'var_y': y,
'var_z': z,
'var_w': w
})
@pytest.fixture
def mixed_data():
"""Create data with mixed types."""
return pd.DataFrame({
'numerical': np.random.normal(100, 15, 100),
'categorical': np.random.choice(['Cat1', 'Cat2', 'Cat3'], 100),
'date': pd.date_range('2024-01-01', periods=100),
'sales': np.random.randint(50, 500, 100)
})
# ============================================================================
# TOP/BOTTOM PERFORMERS TESTS
# ============================================================================
class TestTopBottomPerformers:
"""Test suite for TopBottomPerformers class."""
def test_initialization(self):
"""Test TopBottomPerformers initialization."""
insight = TopBottomPerformers()
assert insight is not None
def test_get_insight_type(self):
"""Test getting insight type."""
insight = TopBottomPerformers()
assert insight.get_insight_type() == "top_bottom_performers"
def test_generate_simple(self, sales_data):
"""Test generating simple top/bottom insights."""
insight = TopBottomPerformers()
result = insight.generate(sales_data, column='sales')
assert result['type'] == 'top_bottom_performers'
assert 'top_performers' in result
assert 'bottom_performers' in result
assert 'summary' in result
def test_generate_with_groupby(self, sales_data):
"""Test generating insights with groupby."""
insight = TopBottomPerformers()
result = insight.generate(
sales_data,
column='sales',
group_by='product',
aggregation='sum'
)
assert result['group_by'] == 'product'
assert result['aggregation'] == 'sum'
assert len(result['top_performers']['data']) > 0
def test_generate_with_custom_n(self, sales_data):
"""Test with custom top_n and bottom_n."""
insight = TopBottomPerformers()
result = insight.generate(
sales_data,
column='sales',
top_n=3,
bottom_n=3
)
assert result['top_performers']['count'] <= 3
assert result['bottom_performers']['count'] <= 3
def test_invalid_column(self, sales_data):
"""Test with invalid column."""
insight = TopBottomPerformers()
with pytest.raises(ValueError):
insight.generate(sales_data, column='nonexistent')
# ============================================================================
# TREND ANALYSIS TESTS
# ============================================================================
class TestTrendAnalysis:
"""Test suite for TrendAnalysis class."""
def test_initialization(self):
"""Test TrendAnalysis initialization."""
insight = TrendAnalysis()
assert insight is not None
def test_get_insight_type(self):
"""Test getting insight type."""
insight = TrendAnalysis()
assert insight.get_insight_type() == "trend_analysis"
def test_generate_trend(self, time_series_data):
"""Test generating trend insights."""
insight = TrendAnalysis()
result = insight.generate(
time_series_data,
date_column='date',
value_column='value'
)
assert result['type'] == 'trend_analysis'
assert 'trend_direction' in result
assert 'metrics' in result
assert 'date_range' in result
assert 'summary' in result
def test_trend_metrics(self, time_series_data):
"""Test trend metrics calculation."""
insight = TrendAnalysis()
result = insight.generate(
time_series_data,
date_column='date',
value_column='value'
)
metrics = result['metrics']
assert 'first_value' in metrics
assert 'last_value' in metrics
assert 'absolute_change' in metrics
assert 'percentage_change' in metrics
assert 'growth_rate' in metrics
assert 'volatility' in metrics
def test_insufficient_data(self):
"""Test with insufficient data."""
df = pd.DataFrame({
'date': [pd.Timestamp('2024-01-01')],
'value': [100]
})
insight = TrendAnalysis()
result = insight.generate(df, date_column='date', value_column='value')
assert 'error' in result
def test_invalid_columns(self, time_series_data):
"""Test with invalid columns."""
insight = TrendAnalysis()
with pytest.raises(ValueError):
insight.generate(
time_series_data,
date_column='nonexistent',
value_column='value'
)
# ============================================================================
# ANOMALY DETECTION TESTS
# ============================================================================
class TestAnomalyDetection:
"""Test suite for AnomalyDetection class."""
def test_initialization(self):
"""Test AnomalyDetection initialization."""
insight = AnomalyDetection()
assert insight is not None
def test_get_insight_type(self):
"""Test getting insight type."""
insight = AnomalyDetection()
assert insight.get_insight_type() == "anomaly_detection"
def test_detect_zscore(self, anomaly_data):
"""Test Z-score anomaly detection."""
insight = AnomalyDetection()
result = insight.generate(
anomaly_data,
column='values',
method='zscore',
threshold=2.5
)
assert result['type'] == 'anomaly_detection'
assert result['method'] == 'zscore'
assert 'statistics' in result
assert 'anomalies' in result
def test_detect_iqr(self, anomaly_data):
"""Test IQR anomaly detection."""
insight = AnomalyDetection()
result = insight.generate(
anomaly_data,
column='values',
method='iqr',
threshold=1.5
)
assert result['method'] == 'iqr'
assert result['statistics']['anomaly_count'] >= 0
def test_no_anomalies(self):
"""Test when no anomalies are found."""
df = pd.DataFrame({
'values': np.random.normal(100, 1, 100) # Very tight distribution
})
insight = AnomalyDetection()
result = insight.generate(df, column='values', threshold=10)
assert result['statistics']['anomaly_count'] == 0
def test_non_numerical_column(self, sales_data):
"""Test with non-numerical column."""
insight = AnomalyDetection()
result = insight.generate(sales_data, column='product')
assert 'error' in result
def test_invalid_method(self, anomaly_data):
"""Test with invalid method."""
insight = AnomalyDetection()
with pytest.raises(ValueError):
insight.generate(anomaly_data, column='values', method='invalid')
# ============================================================================
# DISTRIBUTION INSIGHTS TESTS
# ============================================================================
class TestDistributionInsights:
"""Test suite for DistributionInsights class."""
def test_initialization(self):
"""Test DistributionInsights initialization."""
insight = DistributionInsights()
assert insight is not None
def test_get_insight_type(self):
"""Test getting insight type."""
insight = DistributionInsights()
assert insight.get_insight_type() == "distribution_insights"
def test_numerical_distribution(self, sales_data):
"""Test numerical distribution analysis."""
insight = DistributionInsights()
result = insight.generate(sales_data, column='sales')
assert result['type'] == 'distribution_insights'
assert result['data_type'] == 'numerical'
assert 'statistics' in result
assert 'distribution_shape' in result
def test_numerical_statistics(self, sales_data):
"""Test numerical statistics calculation."""
insight = DistributionInsights()
result = insight.generate(sales_data, column='sales')
stats = result['statistics']
assert 'mean' in stats
assert 'median' in stats
assert 'std' in stats
assert 'skewness' in stats
assert 'kurtosis' in stats
def test_categorical_distribution(self, sales_data):
"""Test categorical distribution analysis."""
insight = DistributionInsights()
result = insight.generate(sales_data, column='product')
assert result['data_type'] == 'categorical'
assert 'value_counts' in result
assert 'most_common' in result['statistics']
def test_empty_column(self):
"""Test with empty column."""
df = pd.DataFrame({'col': [np.nan, np.nan, np.nan]})
insight = DistributionInsights()
result = insight.generate(df, column='col')
assert 'error' in result
# ============================================================================
# CORRELATION INSIGHTS TESTS
# ============================================================================
class TestCorrelationInsights:
"""Test suite for CorrelationInsights class."""
def test_initialization(self):
"""Test CorrelationInsights initialization."""
insight = CorrelationInsights()
assert insight is not None
def test_get_insight_type(self):
"""Test getting insight type."""
insight = CorrelationInsights()
assert insight.get_insight_type() == "correlation_insights"
def test_generate_correlations(self, correlation_data):
"""Test generating correlation insights."""
insight = CorrelationInsights()
result = insight.generate(correlation_data, threshold=0.5)
assert result['type'] == 'correlation_insights'
assert 'strong_correlations_found' in result
assert 'correlations' in result
def test_strong_correlations_found(self, correlation_data):
"""Test that strong correlations are found."""
insight = CorrelationInsights()
result = insight.generate(correlation_data, threshold=0.7)
# Should find strong correlations in our test data
assert result['strong_correlations_found'] > 0
def test_correlation_details(self, correlation_data):
"""Test correlation details."""
insight = CorrelationInsights()
result = insight.generate(correlation_data, threshold=0.5)
if len(result['correlations']) > 0:
corr = result['correlations'][0]
assert 'variable1' in corr
assert 'variable2' in corr
assert 'correlation' in corr
assert 'strength' in corr
assert 'direction' in corr
def test_different_methods(self, correlation_data):
"""Test different correlation methods."""
insight = CorrelationInsights()
# Pearson
result1 = insight.generate(correlation_data, method='pearson')
assert result1['method'] == 'pearson'
# Spearman
result2 = insight.generate(correlation_data, method='spearman')
assert result2['method'] == 'spearman'
def test_insufficient_columns(self):
"""Test with insufficient numerical columns."""
df = pd.DataFrame({'col': [1, 2, 3]})
insight = CorrelationInsights()
result = insight.generate(df)
assert 'error' in result
# ============================================================================
# INSIGHT MANAGER TESTS
# ============================================================================
class TestInsightManager:
"""Test suite for InsightManager class."""
def test_initialization(self):
"""Test InsightManager initialization."""
manager = InsightManager()
assert manager is not None
assert len(manager.strategies) >= 5
def test_get_available_insights(self):
"""Test getting available insights."""
manager = InsightManager()
available = manager.get_available_insights()
assert 'top_bottom' in available
assert 'trend' in available
assert 'anomaly' in available
assert 'distribution' in available
assert 'correlation' in available
def test_generate_top_bottom(self, sales_data):
"""Test generating top/bottom insight through manager."""
manager = InsightManager()
result = manager.generate_insight(
'top_bottom',
sales_data,
column='sales'
)
assert result['type'] == 'top_bottom_performers'
def test_generate_trend(self, time_series_data):
"""Test generating trend insight through manager."""
manager = InsightManager()
result = manager.generate_insight(
'trend',
time_series_data,
date_column='date',
value_column='value'
)
assert result['type'] == 'trend_analysis'
def test_generate_anomaly(self, anomaly_data):
"""Test generating anomaly insight through manager."""
manager = InsightManager()
result = manager.generate_insight(
'anomaly',
anomaly_data,
column='values'
)
assert result['type'] == 'anomaly_detection'
def test_generate_distribution(self, sales_data):
"""Test generating distribution insight through manager."""
manager = InsightManager()
result = manager.generate_insight(
'distribution',
sales_data,
column='sales'
)
assert result['type'] == 'distribution_insights'
def test_generate_correlation(self, correlation_data):
"""Test generating correlation insight through manager."""
manager = InsightManager()
result = manager.generate_insight(
'correlation',
correlation_data
)
assert result['type'] == 'correlation_insights'
def test_unsupported_insight_type(self, sales_data):
"""Test with unsupported insight type."""
manager = InsightManager()
with pytest.raises(ValueError, match="Unsupported insight type"):
manager.generate_insight('invalid_type', sales_data)
def test_generate_all_insights(self, mixed_data):
"""Test generating all insights."""
manager = InsightManager()
results = manager.generate_all_insights(mixed_data)
assert isinstance(results, dict)
# Should generate at least some insights
assert len(results) > 0
def test_add_strategy(self):
"""Test adding new strategy."""
manager = InsightManager()
initial_count = len(manager.strategies)
# Create mock strategy
class MockStrategy(InsightStrategy):
def generate(self, df, **kwargs):
return {'type': 'mock'}
def get_insight_type(self):
return 'mock'
manager.add_strategy('mock', MockStrategy())
assert len(manager.strategies) == initial_count + 1
assert 'mock' in manager.get_available_insights()
def test_format_insight_report(self, sales_data):
"""Test formatting insight report."""
manager = InsightManager()
insights = {
'top_bottom': manager.generate_insight(
'top_bottom', sales_data, column='sales'
)
}
report = manager.format_insight_report(insights)
assert isinstance(report, str)
assert 'INSIGHTS REPORT' in report
assert 'TOP BOTTOM' in report
# ============================================================================
# RUN TESTS
# ============================================================================
if __name__ == "__main__":
pytest.main([__file__, "-v", "--tb=short"])