Spaces:

CraigRoberts15
/

Business-Intelligence-Dashboard

Running

App Files Files Community

Business-Intelligence-Dashboard / tests /test_utils.py

CraigRoberts15

Initial commit: Business Intelligence Dashboard with Git LFS

c51e926 9 days ago

raw

history blame

14.9 kB

	"""
	Unit Tests for Utils Module

	Tests all utility functions and classes following best practices.
	Uses pytest framework for comprehensive testing.

	Author: Craig
	Date: December 2024
	"""

	import pytest
	import pandas as pd
	import numpy as np
	from pathlib import Path
	import tempfile
	import os

	from utils import (
	FileValidator, DataFrameValidator, ColumnValidator,
	format_number, format_percentage, safe_divide,
	get_column_types, detect_date_columns, clean_currency_column,
	truncate_string, get_memory_usage,
	CSVExporter, ExcelExporter, Config
	)


	# ============================================================================
	# FIXTURES
	# Reusable test data following DRY principle
	# ============================================================================

	@pytest.fixture
	def sample_dataframe():
	"""Create a sample DataFrame for testing."""
	return pd.DataFrame({
	'age': [25, 30, 35, 40],
	'name': ['Alice', 'Bob', 'Charlie', 'David'],
	'salary': [50000, 60000, 70000, 80000],
	'date': pd.date_range('2024-01-01', periods=4)
	})


	@pytest.fixture
	def empty_dataframe():
	"""Create an empty DataFrame for testing."""
	return pd.DataFrame()


	@pytest.fixture
	def temp_csv_file():
	"""Create a temporary CSV file."""
	with tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False) as f:
	f.write('col1,col2\n1,2\n3,4\n')
	temp_path = f.name
	yield temp_path
	# Cleanup
	if os.path.exists(temp_path):
	os.remove(temp_path)


	@pytest.fixture
	def temp_xlsx_file():
	"""Create a temporary Excel file."""
	temp_path = tempfile.mktemp(suffix='.xlsx')
	df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
	df.to_excel(temp_path, index=False)
	yield temp_path
	# Cleanup
	if os.path.exists(temp_path):
	os.remove(temp_path)


	# ============================================================================
	# VALIDATOR TESTS
	# ============================================================================

	class TestFileValidator:
	"""Test suite for FileValidator class."""

	def test_validate_existing_csv(self, temp_csv_file):
	"""Test validation of existing CSV file."""
	validator = FileValidator()
	assert validator.validate(temp_csv_file) is True

	def test_validate_existing_xlsx(self, temp_xlsx_file):
	"""Test validation of existing Excel file."""
	validator = FileValidator()
	assert validator.validate(temp_xlsx_file) is True

	def test_validate_nonexistent_file(self):
	"""Test validation of non-existent file."""
	validator = FileValidator()
	with pytest.raises(FileNotFoundError):
	validator.validate('nonexistent_file.csv')

	def test_validate_unsupported_format(self):
	"""Test validation of unsupported file format."""
	validator = FileValidator()
	with tempfile.NamedTemporaryFile(suffix='.txt', delete=False) as f:
	temp_path = f.name

	try:
	with pytest.raises(ValueError, match="Unsupported file format"):
	validator.validate(temp_path)
	finally:
	if os.path.exists(temp_path):
	os.remove(temp_path)

	def test_supported_formats(self):
	"""Test that all expected formats are supported."""
	validator = FileValidator()
	expected_formats = {'.csv', '.xlsx', '.xls', '.parquet', '.json', '.tsv'}
	assert validator.SUPPORTED_FORMATS == expected_formats


	class TestDataFrameValidator:
	"""Test suite for DataFrameValidator class."""

	def test_validate_valid_dataframe(self, sample_dataframe):
	"""Test validation of valid DataFrame."""
	validator = DataFrameValidator()
	assert validator.validate(sample_dataframe) is True

	def test_validate_empty_dataframe(self, empty_dataframe):
	"""Test validation of empty DataFrame."""
	validator = DataFrameValidator()
	with pytest.raises(ValueError, match="DataFrame is empty"):
	validator.validate(empty_dataframe)

	def test_validate_none_dataframe(self):
	"""Test validation of None DataFrame."""
	validator = DataFrameValidator()
	with pytest.raises(ValueError, match="DataFrame cannot be None"):
	validator.validate(None)

	def test_validate_wrong_type(self):
	"""Test validation of wrong data type."""
	validator = DataFrameValidator()
	with pytest.raises(ValueError, match="Expected pandas DataFrame"):
	validator.validate([1, 2, 3])


	class TestColumnValidator:
	"""Test suite for ColumnValidator class."""

	def test_validate_existing_column(self, sample_dataframe):
	"""Test validation of existing column."""
	validator = ColumnValidator()
	assert validator.validate(sample_dataframe, 'age') is True

	def test_validate_existing_columns_list(self, sample_dataframe):
	"""Test validation of multiple existing columns."""
	validator = ColumnValidator()
	assert validator.validate(sample_dataframe, ['age', 'name']) is True

	def test_validate_missing_column(self, sample_dataframe):
	"""Test validation of missing column."""
	validator = ColumnValidator()
	with pytest.raises(ValueError, match="Columns not found"):
	validator.validate(sample_dataframe, 'nonexistent')

	def test_validate_partial_missing_columns(self, sample_dataframe):
	"""Test validation with some missing columns."""
	validator = ColumnValidator()
	with pytest.raises(ValueError, match="Columns not found"):
	validator.validate(sample_dataframe, ['age', 'nonexistent'])


	# ============================================================================
	# FORMATTING FUNCTION TESTS
	# ============================================================================

	class TestFormatNumber:
	"""Test suite for format_number function."""

	def test_format_integer(self):
	"""Test formatting integer."""
	assert format_number(1234567) == "1,234,567"

	def test_format_float(self):
	"""Test formatting float."""
	assert format_number(1234567.89) == "1,234,567.89"

	def test_format_with_decimals(self):
	"""Test formatting with specific decimal places."""
	assert format_number(1234.5678, decimals=3) == "1,234.568"

	def test_format_nan(self):
	"""Test formatting NaN value."""
	assert format_number(np.nan) == "N/A"

	def test_format_none(self):
	"""Test formatting None value."""
	assert format_number(None) == "N/A"


	class TestFormatPercentage:
	"""Test suite for format_percentage function."""

	def test_format_valid_percentage(self):
	"""Test formatting valid percentage."""
	assert format_percentage(0.456) == "45.60%"

	def test_format_zero_percentage(self):
	"""Test formatting zero percentage."""
	assert format_percentage(0.0) == "0.00%"

	def test_format_one_hundred_percent(self):
	"""Test formatting 100%."""
	assert format_percentage(1.0) == "100.00%"

	def test_format_nan_percentage(self):
	"""Test formatting NaN percentage."""
	assert format_percentage(np.nan) == "N/A"

	def test_format_custom_decimals(self):
	"""Test formatting with custom decimal places."""
	assert format_percentage(0.12345, decimals=3) == "12.345%"


	class TestSafeDivide:
	"""Test suite for safe_divide function."""

	def test_normal_division(self):
	"""Test normal division."""
	assert safe_divide(10, 2) == 5.0

	def test_division_by_zero(self):
	"""Test division by zero returns default."""
	assert safe_divide(10, 0, default=0.0) == 0.0

	def test_division_by_nan(self):
	"""Test division by NaN returns default."""
	assert safe_divide(10, np.nan, default=-1.0) == -1.0

	def test_custom_default(self):
	"""Test custom default value."""
	assert safe_divide(10, 0, default=999) == 999


	# ============================================================================
	# DATA ANALYSIS FUNCTION TESTS
	# ============================================================================

	class TestGetColumnTypes:
	"""Test suite for get_column_types function."""

	def test_mixed_types(self, sample_dataframe):
	"""Test getting column types from mixed DataFrame."""
	types = get_column_types(sample_dataframe)
	assert 'age' in types['numerical']
	assert 'salary' in types['numerical']
	assert 'name' in types['categorical']
	assert 'date' in types['datetime']

	def test_only_numerical(self):
	"""Test DataFrame with only numerical columns."""
	df = pd.DataFrame({'a': [1, 2], 'b': [3.0, 4.0]})
	types = get_column_types(df)
	assert len(types['numerical']) == 2
	assert len(types['categorical']) == 0

	def test_only_categorical(self):
	"""Test DataFrame with only categorical columns."""
	df = pd.DataFrame({'a': ['x', 'y'], 'b': ['z', 'w']})
	types = get_column_types(df)
	assert len(types['categorical']) == 2
	assert len(types['numerical']) == 0


	class TestDetectDateColumns:
	"""Test suite for detect_date_columns function."""

	def test_detect_date_string_column(self):
	"""Test detecting date strings."""
	df = pd.DataFrame({
	'date_col': ['2024-01-01', '2024-01-02', '2024-01-03'],
	'text_col': ['abc', 'def', 'ghi']
	})
	date_cols = detect_date_columns(df)
	assert 'date_col' in date_cols
	assert 'text_col' not in date_cols

	def test_no_date_columns(self):
	"""Test DataFrame without date columns."""
	df = pd.DataFrame({
	'num': [1, 2, 3],
	'text': ['a', 'b', 'c']
	})
	date_cols = detect_date_columns(df)
	assert len(date_cols) == 0


	class TestCleanCurrencyColumn:
	"""Test suite for clean_currency_column function."""

	def test_clean_dollar_signs(self):
	"""Test cleaning dollar signs."""
	s = pd.Series(['$1,234.56', '$789.00', '$1,000.00'])
	result = clean_currency_column(s)
	expected = pd.Series([1234.56, 789.00, 1000.00])
	pd.testing.assert_series_equal(result, expected)

	def test_clean_spaces(self):
	"""Test cleaning spaces in currency."""
	s = pd.Series(['$966 ', '$193 '])
	result = clean_currency_column(s)
	assert result[0] == 966.0
	assert result[1] == 193.0

	def test_handle_invalid_values(self):
	"""Test handling invalid currency values."""
	s = pd.Series(['$100', 'invalid', '$200'])
	result = clean_currency_column(s)
	assert result[0] == 100.0
	assert pd.isna(result[1])
	assert result[2] == 200.0


	class TestTruncateString:
	"""Test suite for truncate_string function."""

	def test_truncate_long_string(self):
	"""Test truncating long string."""
	text = "This is a very long text that needs truncation"
	result = truncate_string(text, max_length=20)
	assert len(result) == 20
	assert result.endswith("...")

	def test_no_truncation_needed(self):
	"""Test string that doesn't need truncation."""
	text = "Short text"
	result = truncate_string(text, max_length=20)
	assert result == text

	def test_custom_suffix(self):
	"""Test custom truncation suffix."""
	text = "Long text here"
	result = truncate_string(text, max_length=10, suffix=">>")
	assert result.endswith(">>")


	class TestGetMemoryUsage:
	"""Test suite for get_memory_usage function."""

	def test_small_dataframe(self):
	"""Test memory usage of small DataFrame."""
	df = pd.DataFrame({'a': [1, 2, 3]})
	usage = get_memory_usage(df)
	assert 'B' in usage or 'KB' in usage

	def test_returns_string(self, sample_dataframe):
	"""Test that function returns string."""
	usage = get_memory_usage(sample_dataframe)
	assert isinstance(usage, str)


	# ============================================================================
	# EXPORTER TESTS
	# ============================================================================

	class TestCSVExporter:
	"""Test suite for CSVExporter class."""

	def test_export_csv(self, sample_dataframe):
	"""Test exporting DataFrame to CSV."""
	exporter = CSVExporter()
	temp_path = tempfile.mktemp(suffix='.csv')

	try:
	result = exporter.export(sample_dataframe, temp_path)
	assert result is True
	assert os.path.exists(temp_path)

	# Verify content
	df_loaded = pd.read_csv(temp_path)
	assert df_loaded.shape == sample_dataframe.shape
	finally:
	if os.path.exists(temp_path):
	os.remove(temp_path)


	class TestExcelExporter:
	"""Test suite for ExcelExporter class."""

	def test_export_excel(self, sample_dataframe):
	"""Test exporting DataFrame to Excel."""
	exporter = ExcelExporter()
	temp_path = tempfile.mktemp(suffix='.xlsx')

	try:
	# Remove datetime column for Excel compatibility
	df_test = sample_dataframe.drop('date', axis=1)
	result = exporter.export(df_test, temp_path)
	assert result is True
	assert os.path.exists(temp_path)

	# Verify content
	df_loaded = pd.read_excel(temp_path)
	assert df_loaded.shape == df_test.shape
	finally:
	if os.path.exists(temp_path):
	os.remove(temp_path)


	# ============================================================================
	# CONFIG TESTS
	# ============================================================================

	class TestConfig:
	"""Test suite for Config class."""

	def test_supported_formats_exists(self):
	"""Test that supported formats are defined."""
	assert hasattr(Config, 'SUPPORTED_FILE_FORMATS')
	assert len(Config.SUPPORTED_FILE_FORMATS) > 0

	def test_display_settings_exist(self):
	"""Test that display settings are defined."""
	assert hasattr(Config, 'MAX_DISPLAY_ROWS')
	assert hasattr(Config, 'MAX_STRING_LENGTH')
	assert hasattr(Config, 'DEFAULT_DECIMAL_PLACES')

	def test_config_values_valid(self):
	"""Test that config values are valid."""
	assert Config.MAX_DISPLAY_ROWS > 0
	assert Config.MAX_STRING_LENGTH > 0
	assert Config.DEFAULT_DECIMAL_PLACES >= 0


	# ============================================================================
	# RUN TESTS
	# ============================================================================

	if __name__ == "__main__":
	pytest.main([__file__, "-v", "--tb=short"])