Spaces:

CraigRoberts15
/

Business-Intelligence-Dashboard

Running

File size: 14,894 Bytes

c51e926

"""
Unit Tests for Utils Module

Tests all utility functions and classes following best practices.
Uses pytest framework for comprehensive testing.

Author: Craig
Date: December 2024
"""

import pytest
import pandas as pd
import numpy as np
from pathlib import Path
import tempfile
import os

from utils import (
    FileValidator, DataFrameValidator, ColumnValidator,
    format_number, format_percentage, safe_divide,
    get_column_types, detect_date_columns, clean_currency_column,
    truncate_string, get_memory_usage,
    CSVExporter, ExcelExporter, Config
)


# ============================================================================
# FIXTURES
# Reusable test data following DRY principle
# ============================================================================

@pytest.fixture
def sample_dataframe():
    """Create a sample DataFrame for testing."""
    return pd.DataFrame({
        'age': [25, 30, 35, 40],
        'name': ['Alice', 'Bob', 'Charlie', 'David'],
        'salary': [50000, 60000, 70000, 80000],
        'date': pd.date_range('2024-01-01', periods=4)
    })


@pytest.fixture
def empty_dataframe():
    """Create an empty DataFrame for testing."""
    return pd.DataFrame()


@pytest.fixture
def temp_csv_file():
    """Create a temporary CSV file."""
    with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
        f.write('col1,col2\n1,2\n3,4\n')
        temp_path = f.name
    yield temp_path
    # Cleanup
    if os.path.exists(temp_path):
        os.remove(temp_path)


@pytest.fixture
def temp_xlsx_file():
    """Create a temporary Excel file."""
    temp_path = tempfile.mktemp(suffix='.xlsx')
    df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
    df.to_excel(temp_path, index=False)
    yield temp_path
    # Cleanup
    if os.path.exists(temp_path):
        os.remove(temp_path)


# ============================================================================
# VALIDATOR TESTS
# ============================================================================

class TestFileValidator:
    """Test suite for FileValidator class."""

    def test_validate_existing_csv(self, temp_csv_file):
        """Test validation of existing CSV file."""
        validator = FileValidator()
        assert validator.validate(temp_csv_file) is True

    def test_validate_existing_xlsx(self, temp_xlsx_file):
        """Test validation of existing Excel file."""
        validator = FileValidator()
        assert validator.validate(temp_xlsx_file) is True

    def test_validate_nonexistent_file(self):
        """Test validation of non-existent file."""
        validator = FileValidator()
        with pytest.raises(FileNotFoundError):
            validator.validate('nonexistent_file.csv')

    def test_validate_unsupported_format(self):
        """Test validation of unsupported file format."""
        validator = FileValidator()
        with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as f:
            temp_path = f.name

        try:
            with pytest.raises(ValueError, match="Unsupported file format"):
                validator.validate(temp_path)
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)

    def test_supported_formats(self):
        """Test that all expected formats are supported."""
        validator = FileValidator()
        expected_formats = {'.csv', '.xlsx', '.xls', '.parquet', '.json', '.tsv'}
        assert validator.SUPPORTED_FORMATS == expected_formats


class TestDataFrameValidator:
    """Test suite for DataFrameValidator class."""

    def test_validate_valid_dataframe(self, sample_dataframe):
        """Test validation of valid DataFrame."""
        validator = DataFrameValidator()
        assert validator.validate(sample_dataframe) is True

    def test_validate_empty_dataframe(self, empty_dataframe):
        """Test validation of empty DataFrame."""
        validator = DataFrameValidator()
        with pytest.raises(ValueError, match="DataFrame is empty"):
            validator.validate(empty_dataframe)

    def test_validate_none_dataframe(self):
        """Test validation of None DataFrame."""
        validator = DataFrameValidator()
        with pytest.raises(ValueError, match="DataFrame cannot be None"):
            validator.validate(None)

    def test_validate_wrong_type(self):
        """Test validation of wrong data type."""
        validator = DataFrameValidator()
        with pytest.raises(ValueError, match="Expected pandas DataFrame"):
            validator.validate([1, 2, 3])


class TestColumnValidator:
    """Test suite for ColumnValidator class."""

    def test_validate_existing_column(self, sample_dataframe):
        """Test validation of existing column."""
        validator = ColumnValidator()
        assert validator.validate(sample_dataframe, 'age') is True

    def test_validate_existing_columns_list(self, sample_dataframe):
        """Test validation of multiple existing columns."""
        validator = ColumnValidator()
        assert validator.validate(sample_dataframe, ['age', 'name']) is True

    def test_validate_missing_column(self, sample_dataframe):
        """Test validation of missing column."""
        validator = ColumnValidator()
        with pytest.raises(ValueError, match="Columns not found"):
            validator.validate(sample_dataframe, 'nonexistent')

    def test_validate_partial_missing_columns(self, sample_dataframe):
        """Test validation with some missing columns."""
        validator = ColumnValidator()
        with pytest.raises(ValueError, match="Columns not found"):
            validator.validate(sample_dataframe, ['age', 'nonexistent'])


# ============================================================================
# FORMATTING FUNCTION TESTS
# ============================================================================

class TestFormatNumber:
    """Test suite for format_number function."""

    def test_format_integer(self):
        """Test formatting integer."""
        assert format_number(1234567) == "1,234,567"

    def test_format_float(self):
        """Test formatting float."""
        assert format_number(1234567.89) == "1,234,567.89"

    def test_format_with_decimals(self):
        """Test formatting with specific decimal places."""
        assert format_number(1234.5678, decimals=3) == "1,234.568"

    def test_format_nan(self):
        """Test formatting NaN value."""
        assert format_number(np.nan) == "N/A"

    def test_format_none(self):
        """Test formatting None value."""
        assert format_number(None) == "N/A"


class TestFormatPercentage:
    """Test suite for format_percentage function."""

    def test_format_valid_percentage(self):
        """Test formatting valid percentage."""
        assert format_percentage(0.456) == "45.60%"

    def test_format_zero_percentage(self):
        """Test formatting zero percentage."""
        assert format_percentage(0.0) == "0.00%"

    def test_format_one_hundred_percent(self):
        """Test formatting 100%."""
        assert format_percentage(1.0) == "100.00%"

    def test_format_nan_percentage(self):
        """Test formatting NaN percentage."""
        assert format_percentage(np.nan) == "N/A"

    def test_format_custom_decimals(self):
        """Test formatting with custom decimal places."""
        assert format_percentage(0.12345, decimals=3) == "12.345%"


class TestSafeDivide:
    """Test suite for safe_divide function."""

    def test_normal_division(self):
        """Test normal division."""
        assert safe_divide(10, 2) == 5.0

    def test_division_by_zero(self):
        """Test division by zero returns default."""
        assert safe_divide(10, 0, default=0.0) == 0.0

    def test_division_by_nan(self):
        """Test division by NaN returns default."""
        assert safe_divide(10, np.nan, default=-1.0) == -1.0

    def test_custom_default(self):
        """Test custom default value."""
        assert safe_divide(10, 0, default=999) == 999


# ============================================================================
# DATA ANALYSIS FUNCTION TESTS
# ============================================================================

class TestGetColumnTypes:
    """Test suite for get_column_types function."""

    def test_mixed_types(self, sample_dataframe):
        """Test getting column types from mixed DataFrame."""
        types = get_column_types(sample_dataframe)
        assert 'age' in types['numerical']
        assert 'salary' in types['numerical']
        assert 'name' in types['categorical']
        assert 'date' in types['datetime']

    def test_only_numerical(self):
        """Test DataFrame with only numerical columns."""
        df = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
        types = get_column_types(df)
        assert len(types['numerical']) == 2
        assert len(types['categorical']) == 0

    def test_only_categorical(self):
        """Test DataFrame with only categorical columns."""
        df = pd.DataFrame({'a': ['x', 'y'], 'b': ['z', 'w']})
        types = get_column_types(df)
        assert len(types['categorical']) == 2
        assert len(types['numerical']) == 0


class TestDetectDateColumns:
    """Test suite for detect_date_columns function."""

    def test_detect_date_string_column(self):
        """Test detecting date strings."""
        df = pd.DataFrame({
            'date_col': ['2024-01-01', '2024-01-02', '2024-01-03'],
            'text_col': ['abc', 'def', 'ghi']
        })
        date_cols = detect_date_columns(df)
        assert 'date_col' in date_cols
        assert 'text_col' not in date_cols

    def test_no_date_columns(self):
        """Test DataFrame without date columns."""
        df = pd.DataFrame({
            'num': [1, 2, 3],
            'text': ['a', 'b', 'c']
        })
        date_cols = detect_date_columns(df)
        assert len(date_cols) == 0


class TestCleanCurrencyColumn:
    """Test suite for clean_currency_column function."""

    def test_clean_dollar_signs(self):
        """Test cleaning dollar signs."""
        s = pd.Series(['$1,234.56', '$789.00', '$1,000.00'])
        result = clean_currency_column(s)
        expected = pd.Series([1234.56, 789.00, 1000.00])
        pd.testing.assert_series_equal(result, expected)

    def test_clean_spaces(self):
        """Test cleaning spaces in currency."""
        s = pd.Series(['$966 ', '$193 '])
        result = clean_currency_column(s)
        assert result[0] == 966.0
        assert result[1] == 193.0

    def test_handle_invalid_values(self):
        """Test handling invalid currency values."""
        s = pd.Series(['$100', 'invalid', '$200'])
        result = clean_currency_column(s)
        assert result[0] == 100.0
        assert pd.isna(result[1])
        assert result[2] == 200.0


class TestTruncateString:
    """Test suite for truncate_string function."""

    def test_truncate_long_string(self):
        """Test truncating long string."""
        text = "This is a very long text that needs truncation"
        result = truncate_string(text, max_length=20)
        assert len(result) == 20
        assert result.endswith("...")

    def test_no_truncation_needed(self):
        """Test string that doesn't need truncation."""
        text = "Short text"
        result = truncate_string(text, max_length=20)
        assert result == text

    def test_custom_suffix(self):
        """Test custom truncation suffix."""
        text = "Long text here"
        result = truncate_string(text, max_length=10, suffix=">>")
        assert result.endswith(">>")


class TestGetMemoryUsage:
    """Test suite for get_memory_usage function."""

    def test_small_dataframe(self):
        """Test memory usage of small DataFrame."""
        df = pd.DataFrame({'a': [1, 2, 3]})
        usage = get_memory_usage(df)
        assert 'B' in usage or 'KB' in usage

    def test_returns_string(self, sample_dataframe):
        """Test that function returns string."""
        usage = get_memory_usage(sample_dataframe)
        assert isinstance(usage, str)


# ============================================================================
# EXPORTER TESTS
# ============================================================================

class TestCSVExporter:
    """Test suite for CSVExporter class."""

    def test_export_csv(self, sample_dataframe):
        """Test exporting DataFrame to CSV."""
        exporter = CSVExporter()
        temp_path = tempfile.mktemp(suffix='.csv')

        try:
            result = exporter.export(sample_dataframe, temp_path)
            assert result is True
            assert os.path.exists(temp_path)

            # Verify content
            df_loaded = pd.read_csv(temp_path)
            assert df_loaded.shape == sample_dataframe.shape
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)


class TestExcelExporter:
    """Test suite for ExcelExporter class."""

    def test_export_excel(self, sample_dataframe):
        """Test exporting DataFrame to Excel."""
        exporter = ExcelExporter()
        temp_path = tempfile.mktemp(suffix='.xlsx')

        try:
            # Remove datetime column for Excel compatibility
            df_test = sample_dataframe.drop('date', axis=1)
            result = exporter.export(df_test, temp_path)
            assert result is True
            assert os.path.exists(temp_path)

            # Verify content
            df_loaded = pd.read_excel(temp_path)
            assert df_loaded.shape == df_test.shape
        finally:
            if os.path.exists(temp_path):
                os.remove(temp_path)


# ============================================================================
# CONFIG TESTS
# ============================================================================

class TestConfig:
    """Test suite for Config class."""

    def test_supported_formats_exists(self):
        """Test that supported formats are defined."""
        assert hasattr(Config, 'SUPPORTED_FILE_FORMATS')
        assert len(Config.SUPPORTED_FILE_FORMATS) > 0

    def test_display_settings_exist(self):
        """Test that display settings are defined."""
        assert hasattr(Config, 'MAX_DISPLAY_ROWS')
        assert hasattr(Config, 'MAX_STRING_LENGTH')
        assert hasattr(Config, 'DEFAULT_DECIMAL_PLACES')

    def test_config_values_valid(self):
        """Test that config values are valid."""
        assert Config.MAX_DISPLAY_ROWS > 0
        assert Config.MAX_STRING_LENGTH > 0
        assert Config.DEFAULT_DECIMAL_PLACES >= 0


# ============================================================================
# RUN TESTS
# ============================================================================

if __name__ == "__main__":
    pytest.main([__file__, "-v", "--tb=short"])