|
|
""" |
|
|
Unit Tests for Insights Module |
|
|
|
|
|
Comprehensive tests for all insight strategies and the insight manager. |
|
|
|
|
|
Author: Craig |
|
|
Date: December 2024 |
|
|
""" |
|
|
|
|
|
import pytest |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from datetime import datetime, timedelta |
|
|
|
|
|
from insights import ( |
|
|
InsightStrategy, TopBottomPerformers, TrendAnalysis, |
|
|
AnomalyDetection, DistributionInsights, CorrelationInsights, |
|
|
InsightManager |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def sales_data(): |
|
|
"""Create sample sales data.""" |
|
|
return pd.DataFrame({ |
|
|
'product': ['A', 'B', 'C', 'D', 'E'] * 20, |
|
|
'sales': np.random.randint(100, 1000, 100), |
|
|
'revenue': np.random.uniform(1000, 5000, 100), |
|
|
'region': np.random.choice(['North', 'South', 'East', 'West'], 100) |
|
|
}) |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def time_series_data(): |
|
|
"""Create sample time series data.""" |
|
|
dates = pd.date_range('2024-01-01', periods=100, freq='D') |
|
|
values = np.cumsum(np.random.randn(100)) + 100 |
|
|
return pd.DataFrame({ |
|
|
'date': dates, |
|
|
'value': values, |
|
|
'sales': np.random.randint(50, 200, 100) |
|
|
}) |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def anomaly_data(): |
|
|
"""Create data with anomalies.""" |
|
|
|
|
|
normal = np.random.normal(100, 10, 95) |
|
|
outliers = np.array([200, 10, 250, 5, 220]) |
|
|
data = np.concatenate([normal, outliers]) |
|
|
np.random.shuffle(data) |
|
|
|
|
|
return pd.DataFrame({ |
|
|
'values': data, |
|
|
'category': np.random.choice(['A', 'B', 'C'], 100) |
|
|
}) |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def correlation_data(): |
|
|
"""Create data with correlations.""" |
|
|
np.random.seed(42) |
|
|
x = np.random.normal(50, 10, 100) |
|
|
y = 2 * x + np.random.normal(0, 5, 100) |
|
|
z = -1.5 * x + np.random.normal(0, 8, 100) |
|
|
w = np.random.normal(100, 15, 100) |
|
|
|
|
|
return pd.DataFrame({ |
|
|
'var_x': x, |
|
|
'var_y': y, |
|
|
'var_z': z, |
|
|
'var_w': w |
|
|
}) |
|
|
|
|
|
|
|
|
@pytest.fixture |
|
|
def mixed_data(): |
|
|
"""Create data with mixed types.""" |
|
|
return pd.DataFrame({ |
|
|
'numerical': np.random.normal(100, 15, 100), |
|
|
'categorical': np.random.choice(['Cat1', 'Cat2', 'Cat3'], 100), |
|
|
'date': pd.date_range('2024-01-01', periods=100), |
|
|
'sales': np.random.randint(50, 500, 100) |
|
|
}) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTopBottomPerformers: |
|
|
"""Test suite for TopBottomPerformers class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test TopBottomPerformers initialization.""" |
|
|
insight = TopBottomPerformers() |
|
|
assert insight is not None |
|
|
|
|
|
def test_get_insight_type(self): |
|
|
"""Test getting insight type.""" |
|
|
insight = TopBottomPerformers() |
|
|
assert insight.get_insight_type() == "top_bottom_performers" |
|
|
|
|
|
def test_generate_simple(self, sales_data): |
|
|
"""Test generating simple top/bottom insights.""" |
|
|
insight = TopBottomPerformers() |
|
|
result = insight.generate(sales_data, column='sales') |
|
|
|
|
|
assert result['type'] == 'top_bottom_performers' |
|
|
assert 'top_performers' in result |
|
|
assert 'bottom_performers' in result |
|
|
assert 'summary' in result |
|
|
|
|
|
def test_generate_with_groupby(self, sales_data): |
|
|
"""Test generating insights with groupby.""" |
|
|
insight = TopBottomPerformers() |
|
|
result = insight.generate( |
|
|
sales_data, |
|
|
column='sales', |
|
|
group_by='product', |
|
|
aggregation='sum' |
|
|
) |
|
|
|
|
|
assert result['group_by'] == 'product' |
|
|
assert result['aggregation'] == 'sum' |
|
|
assert len(result['top_performers']['data']) > 0 |
|
|
|
|
|
def test_generate_with_custom_n(self, sales_data): |
|
|
"""Test with custom top_n and bottom_n.""" |
|
|
insight = TopBottomPerformers() |
|
|
result = insight.generate( |
|
|
sales_data, |
|
|
column='sales', |
|
|
top_n=3, |
|
|
bottom_n=3 |
|
|
) |
|
|
|
|
|
assert result['top_performers']['count'] <= 3 |
|
|
assert result['bottom_performers']['count'] <= 3 |
|
|
|
|
|
def test_invalid_column(self, sales_data): |
|
|
"""Test with invalid column.""" |
|
|
insight = TopBottomPerformers() |
|
|
with pytest.raises(ValueError): |
|
|
insight.generate(sales_data, column='nonexistent') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestTrendAnalysis: |
|
|
"""Test suite for TrendAnalysis class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test TrendAnalysis initialization.""" |
|
|
insight = TrendAnalysis() |
|
|
assert insight is not None |
|
|
|
|
|
def test_get_insight_type(self): |
|
|
"""Test getting insight type.""" |
|
|
insight = TrendAnalysis() |
|
|
assert insight.get_insight_type() == "trend_analysis" |
|
|
|
|
|
def test_generate_trend(self, time_series_data): |
|
|
"""Test generating trend insights.""" |
|
|
insight = TrendAnalysis() |
|
|
result = insight.generate( |
|
|
time_series_data, |
|
|
date_column='date', |
|
|
value_column='value' |
|
|
) |
|
|
|
|
|
assert result['type'] == 'trend_analysis' |
|
|
assert 'trend_direction' in result |
|
|
assert 'metrics' in result |
|
|
assert 'date_range' in result |
|
|
assert 'summary' in result |
|
|
|
|
|
def test_trend_metrics(self, time_series_data): |
|
|
"""Test trend metrics calculation.""" |
|
|
insight = TrendAnalysis() |
|
|
result = insight.generate( |
|
|
time_series_data, |
|
|
date_column='date', |
|
|
value_column='value' |
|
|
) |
|
|
|
|
|
metrics = result['metrics'] |
|
|
assert 'first_value' in metrics |
|
|
assert 'last_value' in metrics |
|
|
assert 'absolute_change' in metrics |
|
|
assert 'percentage_change' in metrics |
|
|
assert 'growth_rate' in metrics |
|
|
assert 'volatility' in metrics |
|
|
|
|
|
def test_insufficient_data(self): |
|
|
"""Test with insufficient data.""" |
|
|
df = pd.DataFrame({ |
|
|
'date': [pd.Timestamp('2024-01-01')], |
|
|
'value': [100] |
|
|
}) |
|
|
|
|
|
insight = TrendAnalysis() |
|
|
result = insight.generate(df, date_column='date', value_column='value') |
|
|
|
|
|
assert 'error' in result |
|
|
|
|
|
def test_invalid_columns(self, time_series_data): |
|
|
"""Test with invalid columns.""" |
|
|
insight = TrendAnalysis() |
|
|
with pytest.raises(ValueError): |
|
|
insight.generate( |
|
|
time_series_data, |
|
|
date_column='nonexistent', |
|
|
value_column='value' |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestAnomalyDetection: |
|
|
"""Test suite for AnomalyDetection class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test AnomalyDetection initialization.""" |
|
|
insight = AnomalyDetection() |
|
|
assert insight is not None |
|
|
|
|
|
def test_get_insight_type(self): |
|
|
"""Test getting insight type.""" |
|
|
insight = AnomalyDetection() |
|
|
assert insight.get_insight_type() == "anomaly_detection" |
|
|
|
|
|
def test_detect_zscore(self, anomaly_data): |
|
|
"""Test Z-score anomaly detection.""" |
|
|
insight = AnomalyDetection() |
|
|
result = insight.generate( |
|
|
anomaly_data, |
|
|
column='values', |
|
|
method='zscore', |
|
|
threshold=2.5 |
|
|
) |
|
|
|
|
|
assert result['type'] == 'anomaly_detection' |
|
|
assert result['method'] == 'zscore' |
|
|
assert 'statistics' in result |
|
|
assert 'anomalies' in result |
|
|
|
|
|
def test_detect_iqr(self, anomaly_data): |
|
|
"""Test IQR anomaly detection.""" |
|
|
insight = AnomalyDetection() |
|
|
result = insight.generate( |
|
|
anomaly_data, |
|
|
column='values', |
|
|
method='iqr', |
|
|
threshold=1.5 |
|
|
) |
|
|
|
|
|
assert result['method'] == 'iqr' |
|
|
assert result['statistics']['anomaly_count'] >= 0 |
|
|
|
|
|
def test_no_anomalies(self): |
|
|
"""Test when no anomalies are found.""" |
|
|
df = pd.DataFrame({ |
|
|
'values': np.random.normal(100, 1, 100) |
|
|
}) |
|
|
|
|
|
insight = AnomalyDetection() |
|
|
result = insight.generate(df, column='values', threshold=10) |
|
|
|
|
|
assert result['statistics']['anomaly_count'] == 0 |
|
|
|
|
|
def test_non_numerical_column(self, sales_data): |
|
|
"""Test with non-numerical column.""" |
|
|
insight = AnomalyDetection() |
|
|
result = insight.generate(sales_data, column='product') |
|
|
|
|
|
assert 'error' in result |
|
|
|
|
|
def test_invalid_method(self, anomaly_data): |
|
|
"""Test with invalid method.""" |
|
|
insight = AnomalyDetection() |
|
|
with pytest.raises(ValueError): |
|
|
insight.generate(anomaly_data, column='values', method='invalid') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestDistributionInsights: |
|
|
"""Test suite for DistributionInsights class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test DistributionInsights initialization.""" |
|
|
insight = DistributionInsights() |
|
|
assert insight is not None |
|
|
|
|
|
def test_get_insight_type(self): |
|
|
"""Test getting insight type.""" |
|
|
insight = DistributionInsights() |
|
|
assert insight.get_insight_type() == "distribution_insights" |
|
|
|
|
|
def test_numerical_distribution(self, sales_data): |
|
|
"""Test numerical distribution analysis.""" |
|
|
insight = DistributionInsights() |
|
|
result = insight.generate(sales_data, column='sales') |
|
|
|
|
|
assert result['type'] == 'distribution_insights' |
|
|
assert result['data_type'] == 'numerical' |
|
|
assert 'statistics' in result |
|
|
assert 'distribution_shape' in result |
|
|
|
|
|
def test_numerical_statistics(self, sales_data): |
|
|
"""Test numerical statistics calculation.""" |
|
|
insight = DistributionInsights() |
|
|
result = insight.generate(sales_data, column='sales') |
|
|
|
|
|
stats = result['statistics'] |
|
|
assert 'mean' in stats |
|
|
assert 'median' in stats |
|
|
assert 'std' in stats |
|
|
assert 'skewness' in stats |
|
|
assert 'kurtosis' in stats |
|
|
|
|
|
def test_categorical_distribution(self, sales_data): |
|
|
"""Test categorical distribution analysis.""" |
|
|
insight = DistributionInsights() |
|
|
result = insight.generate(sales_data, column='product') |
|
|
|
|
|
assert result['data_type'] == 'categorical' |
|
|
assert 'value_counts' in result |
|
|
assert 'most_common' in result['statistics'] |
|
|
|
|
|
def test_empty_column(self): |
|
|
"""Test with empty column.""" |
|
|
df = pd.DataFrame({'col': [np.nan, np.nan, np.nan]}) |
|
|
|
|
|
insight = DistributionInsights() |
|
|
result = insight.generate(df, column='col') |
|
|
|
|
|
assert 'error' in result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestCorrelationInsights: |
|
|
"""Test suite for CorrelationInsights class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test CorrelationInsights initialization.""" |
|
|
insight = CorrelationInsights() |
|
|
assert insight is not None |
|
|
|
|
|
def test_get_insight_type(self): |
|
|
"""Test getting insight type.""" |
|
|
insight = CorrelationInsights() |
|
|
assert insight.get_insight_type() == "correlation_insights" |
|
|
|
|
|
def test_generate_correlations(self, correlation_data): |
|
|
"""Test generating correlation insights.""" |
|
|
insight = CorrelationInsights() |
|
|
result = insight.generate(correlation_data, threshold=0.5) |
|
|
|
|
|
assert result['type'] == 'correlation_insights' |
|
|
assert 'strong_correlations_found' in result |
|
|
assert 'correlations' in result |
|
|
|
|
|
def test_strong_correlations_found(self, correlation_data): |
|
|
"""Test that strong correlations are found.""" |
|
|
insight = CorrelationInsights() |
|
|
result = insight.generate(correlation_data, threshold=0.7) |
|
|
|
|
|
|
|
|
assert result['strong_correlations_found'] > 0 |
|
|
|
|
|
def test_correlation_details(self, correlation_data): |
|
|
"""Test correlation details.""" |
|
|
insight = CorrelationInsights() |
|
|
result = insight.generate(correlation_data, threshold=0.5) |
|
|
|
|
|
if len(result['correlations']) > 0: |
|
|
corr = result['correlations'][0] |
|
|
assert 'variable1' in corr |
|
|
assert 'variable2' in corr |
|
|
assert 'correlation' in corr |
|
|
assert 'strength' in corr |
|
|
assert 'direction' in corr |
|
|
|
|
|
def test_different_methods(self, correlation_data): |
|
|
"""Test different correlation methods.""" |
|
|
insight = CorrelationInsights() |
|
|
|
|
|
|
|
|
result1 = insight.generate(correlation_data, method='pearson') |
|
|
assert result1['method'] == 'pearson' |
|
|
|
|
|
|
|
|
result2 = insight.generate(correlation_data, method='spearman') |
|
|
assert result2['method'] == 'spearman' |
|
|
|
|
|
def test_insufficient_columns(self): |
|
|
"""Test with insufficient numerical columns.""" |
|
|
df = pd.DataFrame({'col': [1, 2, 3]}) |
|
|
|
|
|
insight = CorrelationInsights() |
|
|
result = insight.generate(df) |
|
|
|
|
|
assert 'error' in result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TestInsightManager: |
|
|
"""Test suite for InsightManager class.""" |
|
|
|
|
|
def test_initialization(self): |
|
|
"""Test InsightManager initialization.""" |
|
|
manager = InsightManager() |
|
|
assert manager is not None |
|
|
assert len(manager.strategies) >= 5 |
|
|
|
|
|
def test_get_available_insights(self): |
|
|
"""Test getting available insights.""" |
|
|
manager = InsightManager() |
|
|
available = manager.get_available_insights() |
|
|
|
|
|
assert 'top_bottom' in available |
|
|
assert 'trend' in available |
|
|
assert 'anomaly' in available |
|
|
assert 'distribution' in available |
|
|
assert 'correlation' in available |
|
|
|
|
|
def test_generate_top_bottom(self, sales_data): |
|
|
"""Test generating top/bottom insight through manager.""" |
|
|
manager = InsightManager() |
|
|
result = manager.generate_insight( |
|
|
'top_bottom', |
|
|
sales_data, |
|
|
column='sales' |
|
|
) |
|
|
|
|
|
assert result['type'] == 'top_bottom_performers' |
|
|
|
|
|
def test_generate_trend(self, time_series_data): |
|
|
"""Test generating trend insight through manager.""" |
|
|
manager = InsightManager() |
|
|
result = manager.generate_insight( |
|
|
'trend', |
|
|
time_series_data, |
|
|
date_column='date', |
|
|
value_column='value' |
|
|
) |
|
|
|
|
|
assert result['type'] == 'trend_analysis' |
|
|
|
|
|
def test_generate_anomaly(self, anomaly_data): |
|
|
"""Test generating anomaly insight through manager.""" |
|
|
manager = InsightManager() |
|
|
result = manager.generate_insight( |
|
|
'anomaly', |
|
|
anomaly_data, |
|
|
column='values' |
|
|
) |
|
|
|
|
|
assert result['type'] == 'anomaly_detection' |
|
|
|
|
|
def test_generate_distribution(self, sales_data): |
|
|
"""Test generating distribution insight through manager.""" |
|
|
manager = InsightManager() |
|
|
result = manager.generate_insight( |
|
|
'distribution', |
|
|
sales_data, |
|
|
column='sales' |
|
|
) |
|
|
|
|
|
assert result['type'] == 'distribution_insights' |
|
|
|
|
|
def test_generate_correlation(self, correlation_data): |
|
|
"""Test generating correlation insight through manager.""" |
|
|
manager = InsightManager() |
|
|
result = manager.generate_insight( |
|
|
'correlation', |
|
|
correlation_data |
|
|
) |
|
|
|
|
|
assert result['type'] == 'correlation_insights' |
|
|
|
|
|
def test_unsupported_insight_type(self, sales_data): |
|
|
"""Test with unsupported insight type.""" |
|
|
manager = InsightManager() |
|
|
|
|
|
with pytest.raises(ValueError, match="Unsupported insight type"): |
|
|
manager.generate_insight('invalid_type', sales_data) |
|
|
|
|
|
def test_generate_all_insights(self, mixed_data): |
|
|
"""Test generating all insights.""" |
|
|
manager = InsightManager() |
|
|
results = manager.generate_all_insights(mixed_data) |
|
|
|
|
|
assert isinstance(results, dict) |
|
|
|
|
|
assert len(results) > 0 |
|
|
|
|
|
def test_add_strategy(self): |
|
|
"""Test adding new strategy.""" |
|
|
manager = InsightManager() |
|
|
initial_count = len(manager.strategies) |
|
|
|
|
|
|
|
|
class MockStrategy(InsightStrategy): |
|
|
def generate(self, df, **kwargs): |
|
|
return {'type': 'mock'} |
|
|
|
|
|
def get_insight_type(self): |
|
|
return 'mock' |
|
|
|
|
|
manager.add_strategy('mock', MockStrategy()) |
|
|
assert len(manager.strategies) == initial_count + 1 |
|
|
assert 'mock' in manager.get_available_insights() |
|
|
|
|
|
def test_format_insight_report(self, sales_data): |
|
|
"""Test formatting insight report.""" |
|
|
manager = InsightManager() |
|
|
insights = { |
|
|
'top_bottom': manager.generate_insight( |
|
|
'top_bottom', sales_data, column='sales' |
|
|
) |
|
|
} |
|
|
|
|
|
report = manager.format_insight_report(insights) |
|
|
assert isinstance(report, str) |
|
|
assert 'INSIGHTS REPORT' in report |
|
|
assert 'TOP BOTTOM' in report |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
pytest.main([__file__, "-v", "--tb=short"]) |