# Minimal SkyPilot test - validates cloud provisioning works
# Cost: ~$0.10 for 5 minutes
# This actually provisions a machine and tests the setup

name: ensemble-test-minimal

resources:
  use_spot: true

  # Use cheapest GPU available (or CPU if no GPU quota)
  accelerators: {L4:1, T4:1, V100:1}  # Try L4 first (cheapest)
  # If no GPU quota, comment above and use CPU:
  # cpus: 2+

  memory: 8+
  disk_size: 50

setup: |
  set -e

  echo "=================================================="
  echo "🧪 SKYPILOT MINIMAL TEST"
  echo "=================================================="
  echo ""
  echo "Testing:"
  echo "  1. Machine provisioning ✓"
  echo "  2. Internet connectivity"
  echo "  3. Python environment"
  echo "  4. Git clone"
  echo "  5. Dependencies install"
  echo "  6. Repository structure"
  echo ""

  # Test 1: Machine info
  echo "📊 Machine Info:"
  echo "  Hostname: $(hostname)"
  echo "  CPU cores: $(nproc)"
  echo "  Memory: $(free -h | grep Mem | awk '{print $2}')"
  echo "  Disk: $(df -h / | tail -1 | awk '{print $2}')"

  # Check GPU (if available)
  if command -v nvidia-smi &> /dev/null; then
    echo "  GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)"
  else
    echo "  GPU: None (CPU-only)"
  fi
  echo ""

  # Test 2: Internet
  echo "🌐 Testing internet connectivity..."
  curl -s -o /dev/null -w "  HTTP Status: %{http_code}\n" https://huggingface.co/
  echo ""

  # Test 3: Python
  echo "🐍 Python environment:"
  python3 --version
  pip3 --version
  echo ""

  # Test 4: Install minimal dependencies
  echo "📦 Installing minimal dependencies..."
  pip install -q numpy soundfile librosa
  echo "  ✓ numpy, soundfile, librosa installed"
  echo ""

  # Test 5: Git clone
  echo "📥 Cloning repository..."
  if [ ! -d "ensemble-tts-annotation" ]; then
    git clone -q https://huggingface.co/marcosremar2/ensemble-tts-annotation
    echo "  ✓ Repository cloned"
  else
    echo "  ✓ Repository already exists"
  fi
  echo ""

  echo "✅ Setup complete!"

run: |
  cd ensemble-tts-annotation

  echo ""
  echo "=================================================="
  echo "🧪 RUNNING VALIDATION TESTS"
  echo "=================================================="
  echo ""

  # Test 6: Repository structure
  echo "📁 Validating repository structure..."

  REQUIRED_FILES=(
    "README.md"
    "QUICK_START_SKYPILOT.md"
    "scripts/cloud/skypilot_finetune.yaml"
    "scripts/data/create_synthetic_test_data.py"
    "scripts/test/test_end_to_end.py"
    "ensemble_tts/__init__.py"
  )

  ALL_FOUND=true
  for file in "${REQUIRED_FILES[@]}"; do
    if [ -f "$file" ]; then
      echo "  ✓ $file"
    else
      echo "  ❌ $file NOT FOUND"
      ALL_FOUND=false
    fi
  done
  echo ""

  if [ "$ALL_FOUND" = false ]; then
    echo "❌ Some files missing!"
    exit 1
  fi

  # Test 7: Python imports
  echo "🐍 Testing Python imports..."
  python3 << 'PYTHON_EOF'
import sys
import numpy as np
import soundfile as sf
import librosa

print('  ✓ numpy:', np.__version__)
print('  ✓ soundfile:', sf.__version__)
print('  ✓ librosa:', librosa.__version__)

# Test basic functionality
audio = np.random.randn(16000)
print('  ✓ numpy array creation works')

# Test librosa
rms = librosa.feature.rms(y=audio)[0].mean()
print(f'  ✓ librosa feature extraction works (RMS: {rms:.4f})')
PYTHON_EOF
  echo ""

  # Test 8: Synthetic data generation (1 sample)
  echo "🎵 Testing synthetic data generation..."
  python3 scripts/data/create_synthetic_test_data.py \
    --output test_data_tmp \
    --samples 1

  # Check if files created
  AUDIO_COUNT=$(find test_data_tmp -name "*.wav" | wc -l)
  echo "  ✓ Created $AUDIO_COUNT audio files"

  if [ "$AUDIO_COUNT" -ne 7 ]; then
    echo "  ❌ Expected 7 files, got $AUDIO_COUNT"
    exit 1
  fi

  # Test audio loading
  FIRST_AUDIO=$(find test_data_tmp -name "*.wav" | head -1)
  python3 << PYTHON_EOF2
import soundfile as sf
import librosa

audio, sr = sf.read('$FIRST_AUDIO')
print(f'  ✓ Audio loading works: {len(audio)/sr:.1f}s @ {sr}Hz')

rms = librosa.feature.rms(y=audio)[0].mean()
print(f'  ✓ Feature extraction: RMS={rms:.4f}')
PYTHON_EOF2
  echo ""

  # Test 9: Voting system
  echo "🗳️  Testing ensemble voting..."
  python3 << 'PYTHON_EOF3'
import sys
sys.path.insert(0, '.')

from ensemble_tts.voting import WeightedVoting

predictions = [
    {'label': 'happy', 'confidence': 0.85, 'model_name': 'model1', 'model_weight': 0.5},
    {'label': 'happy', 'confidence': 0.75, 'model_name': 'model2', 'model_weight': 0.3},
    {'label': 'neutral', 'confidence': 0.65, 'model_name': 'model3', 'model_weight': 0.2},
]

voter = WeightedVoting()
result = voter.vote(predictions, key='label')

print(f'  ✓ Voting works: {result["label"]} ({result["confidence"]:.2%})')
PYTHON_EOF3
  echo ""

  # Cleanup
  rm -rf test_data_tmp

  echo "=================================================="
  echo "✅ ALL TESTS PASSED!"
  echo "=================================================="
  echo ""
  echo "📊 Summary:"
  echo "  ✓ Machine provisioned successfully"
  echo "  ✓ Internet connectivity working"
  echo "  ✓ Python environment functional"
  echo "  ✓ Repository cloned and validated"
  echo "  ✓ Dependencies installed"
  echo "  ✓ Synthetic data generation works"
  echo "  ✓ Audio processing works"
  echo "  ✓ Ensemble voting works"
  echo ""
  echo "🎉 SkyPilot infrastructure validated!"
  echo ""
  echo "💰 Cost: ~$0.10 for this test (5 minutes)"
  echo ""
  echo "📝 Next steps:"
  echo "  1. Fine-tune: sky launch scripts/cloud/skypilot_finetune.yaml"
  echo "  2. Multi-GPU: sky launch scripts/cloud/skypilot_multi_gpu.yaml"
  echo "  3. Annotate: sky launch scripts/cloud/skypilot_annotate_orpheus.yaml"
  echo ""

num_nodes: 1