ensemble-tts-annotation / scripts /cloud /skypilot_test_minimal.yaml
marcosremar
Fix YAML syntax in SkyPilot test (use heredoc for Python blocks)
a0b0ea2
# Minimal SkyPilot test - validates cloud provisioning works
# Cost: ~$0.10 for 5 minutes
# This actually provisions a machine and tests the setup
name: ensemble-test-minimal
resources:
use_spot: true
# Use cheapest GPU available (or CPU if no GPU quota)
accelerators: {L4:1, T4:1, V100:1} # Try L4 first (cheapest)
# If no GPU quota, comment above and use CPU:
# cpus: 2+
memory: 8+
disk_size: 50
setup: |
set -e
echo "=================================================="
echo "πŸ§ͺ SKYPILOT MINIMAL TEST"
echo "=================================================="
echo ""
echo "Testing:"
echo " 1. Machine provisioning βœ“"
echo " 2. Internet connectivity"
echo " 3. Python environment"
echo " 4. Git clone"
echo " 5. Dependencies install"
echo " 6. Repository structure"
echo ""
# Test 1: Machine info
echo "πŸ“Š Machine Info:"
echo " Hostname: $(hostname)"
echo " CPU cores: $(nproc)"
echo " Memory: $(free -h | grep Mem | awk '{print $2}')"
echo " Disk: $(df -h / | tail -1 | awk '{print $2}')"
# Check GPU (if available)
if command -v nvidia-smi &> /dev/null; then
echo " GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)"
else
echo " GPU: None (CPU-only)"
fi
echo ""
# Test 2: Internet
echo "🌐 Testing internet connectivity..."
curl -s -o /dev/null -w " HTTP Status: %{http_code}\n" https://huggingface.co/
echo ""
# Test 3: Python
echo "🐍 Python environment:"
python3 --version
pip3 --version
echo ""
# Test 4: Install minimal dependencies
echo "πŸ“¦ Installing minimal dependencies..."
pip install -q numpy soundfile librosa
echo " βœ“ numpy, soundfile, librosa installed"
echo ""
# Test 5: Git clone
echo "πŸ“₯ Cloning repository..."
if [ ! -d "ensemble-tts-annotation" ]; then
git clone -q https://huggingface.co/marcosremar2/ensemble-tts-annotation
echo " βœ“ Repository cloned"
else
echo " βœ“ Repository already exists"
fi
echo ""
echo "βœ… Setup complete!"
run: |
cd ensemble-tts-annotation
echo ""
echo "=================================================="
echo "πŸ§ͺ RUNNING VALIDATION TESTS"
echo "=================================================="
echo ""
# Test 6: Repository structure
echo "πŸ“ Validating repository structure..."
REQUIRED_FILES=(
"README.md"
"QUICK_START_SKYPILOT.md"
"scripts/cloud/skypilot_finetune.yaml"
"scripts/data/create_synthetic_test_data.py"
"scripts/test/test_end_to_end.py"
"ensemble_tts/__init__.py"
)
ALL_FOUND=true
for file in "${REQUIRED_FILES[@]}"; do
if [ -f "$file" ]; then
echo " βœ“ $file"
else
echo " ❌ $file NOT FOUND"
ALL_FOUND=false
fi
done
echo ""
if [ "$ALL_FOUND" = false ]; then
echo "❌ Some files missing!"
exit 1
fi
# Test 7: Python imports
echo "🐍 Testing Python imports..."
python3 << 'PYTHON_EOF'
import sys
import numpy as np
import soundfile as sf
import librosa
print(' βœ“ numpy:', np.__version__)
print(' βœ“ soundfile:', sf.__version__)
print(' βœ“ librosa:', librosa.__version__)
# Test basic functionality
audio = np.random.randn(16000)
print(' βœ“ numpy array creation works')
# Test librosa
rms = librosa.feature.rms(y=audio)[0].mean()
print(f' βœ“ librosa feature extraction works (RMS: {rms:.4f})')
PYTHON_EOF
echo ""
# Test 8: Synthetic data generation (1 sample)
echo "🎡 Testing synthetic data generation..."
python3 scripts/data/create_synthetic_test_data.py \
--output test_data_tmp \
--samples 1
# Check if files created
AUDIO_COUNT=$(find test_data_tmp -name "*.wav" | wc -l)
echo " βœ“ Created $AUDIO_COUNT audio files"
if [ "$AUDIO_COUNT" -ne 7 ]; then
echo " ❌ Expected 7 files, got $AUDIO_COUNT"
exit 1
fi
# Test audio loading
FIRST_AUDIO=$(find test_data_tmp -name "*.wav" | head -1)
python3 << PYTHON_EOF2
import soundfile as sf
import librosa
audio, sr = sf.read('$FIRST_AUDIO')
print(f' βœ“ Audio loading works: {len(audio)/sr:.1f}s @ {sr}Hz')
rms = librosa.feature.rms(y=audio)[0].mean()
print(f' βœ“ Feature extraction: RMS={rms:.4f}')
PYTHON_EOF2
echo ""
# Test 9: Voting system
echo "πŸ—³οΈ Testing ensemble voting..."
python3 << 'PYTHON_EOF3'
import sys
sys.path.insert(0, '.')
from ensemble_tts.voting import WeightedVoting
predictions = [
{'label': 'happy', 'confidence': 0.85, 'model_name': 'model1', 'model_weight': 0.5},
{'label': 'happy', 'confidence': 0.75, 'model_name': 'model2', 'model_weight': 0.3},
{'label': 'neutral', 'confidence': 0.65, 'model_name': 'model3', 'model_weight': 0.2},
]
voter = WeightedVoting()
result = voter.vote(predictions, key='label')
print(f' βœ“ Voting works: {result["label"]} ({result["confidence"]:.2%})')
PYTHON_EOF3
echo ""
# Cleanup
rm -rf test_data_tmp
echo "=================================================="
echo "βœ… ALL TESTS PASSED!"
echo "=================================================="
echo ""
echo "πŸ“Š Summary:"
echo " βœ“ Machine provisioned successfully"
echo " βœ“ Internet connectivity working"
echo " βœ“ Python environment functional"
echo " βœ“ Repository cloned and validated"
echo " βœ“ Dependencies installed"
echo " βœ“ Synthetic data generation works"
echo " βœ“ Audio processing works"
echo " βœ“ Ensemble voting works"
echo ""
echo "πŸŽ‰ SkyPilot infrastructure validated!"
echo ""
echo "πŸ’° Cost: ~$0.10 for this test (5 minutes)"
echo ""
echo "πŸ“ Next steps:"
echo " 1. Fine-tune: sky launch scripts/cloud/skypilot_finetune.yaml"
echo " 2. Multi-GPU: sky launch scripts/cloud/skypilot_multi_gpu.yaml"
echo " 3. Annotate: sky launch scripts/cloud/skypilot_annotate_orpheus.yaml"
echo ""
num_nodes: 1