ensemble-tts-annotation / scripts /cloud /skypilot_test_minimal.yaml

marcosremar

Fix YAML syntax in SkyPilot test (use heredoc for Python blocks)

a0b0ea2 3 months ago

5.82 kB

	# Minimal SkyPilot test - validates cloud provisioning works
	# Cost: ~$0.10 for 5 minutes
	# This actually provisions a machine and tests the setup

	name: ensemble-test-minimal

	resources:
	use_spot: true

	# Use cheapest GPU available (or CPU if no GPU quota)
	accelerators: {L4:1, T4:1, V100:1} # Try L4 first (cheapest)
	# If no GPU quota, comment above and use CPU:
	# cpus: 2+

	memory: 8+
	disk_size: 50

	setup: \|
	set -e

	echo "=================================================="
	echo "🧪 SKYPILOT MINIMAL TEST"
	echo "=================================================="
	echo ""
	echo "Testing:"
	echo " 1. Machine provisioning ✓"
	echo " 2. Internet connectivity"
	echo " 3. Python environment"
	echo " 4. Git clone"
	echo " 5. Dependencies install"
	echo " 6. Repository structure"
	echo ""

	# Test 1: Machine info
	echo "📊 Machine Info:"
	echo " Hostname: $(hostname)"
	echo " CPU cores: $(nproc)"
	echo " Memory: $(free -h \| grep Mem \| awk '{print $2}')"
	echo " Disk: $(df -h / \| tail -1 \| awk '{print $2}')"

	# Check GPU (if available)
	if command -v nvidia-smi &> /dev/null; then
	echo " GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)"
	else
	echo " GPU: None (CPU-only)"
	fi
	echo ""

	# Test 2: Internet
	echo "🌐 Testing internet connectivity..."
	curl -s -o /dev/null -w " HTTP Status: %{http_code}\n" https://huggingface.co/
	echo ""

	# Test 3: Python
	echo "🐍 Python environment:"
	python3 --version
	pip3 --version
	echo ""

	# Test 4: Install minimal dependencies
	echo "📦 Installing minimal dependencies..."
	pip install -q numpy soundfile librosa
	echo " ✓ numpy, soundfile, librosa installed"
	echo ""

	# Test 5: Git clone
	echo "📥 Cloning repository..."
	if [ ! -d "ensemble-tts-annotation" ]; then
	git clone -q https://huggingface.co/marcosremar2/ensemble-tts-annotation
	echo " ✓ Repository cloned"
	else
	echo " ✓ Repository already exists"
	fi
	echo ""

	echo "✅ Setup complete!"

	run: \|
	cd ensemble-tts-annotation

	echo ""
	echo "=================================================="
	echo "🧪 RUNNING VALIDATION TESTS"
	echo "=================================================="
	echo ""

	# Test 6: Repository structure
	echo "📁 Validating repository structure..."

	REQUIRED_FILES=(
	"README.md"
	"QUICK_START_SKYPILOT.md"
	"scripts/cloud/skypilot_finetune.yaml"
	"scripts/data/create_synthetic_test_data.py"
	"scripts/test/test_end_to_end.py"
	"ensemble_tts/__init__.py"
	)

	ALL_FOUND=true
	for file in "${REQUIRED_FILES[@]}"; do
	if [ -f "$file" ]; then
	echo " ✓ $file"
	else
	echo " ❌ $file NOT FOUND"
	ALL_FOUND=false
	fi
	done
	echo ""

	if [ "$ALL_FOUND" = false ]; then
	echo "❌ Some files missing!"
	exit 1
	fi

	# Test 7: Python imports
	echo "🐍 Testing Python imports..."
	python3 << 'PYTHON_EOF'
	import sys
	import numpy as np
	import soundfile as sf
	import librosa

	print(' ✓ numpy:', np.__version__)
	print(' ✓ soundfile:', sf.__version__)
	print(' ✓ librosa:', librosa.__version__)

	# Test basic functionality
	audio = np.random.randn(16000)
	print(' ✓ numpy array creation works')

	# Test librosa
	rms = librosa.feature.rms(y=audio)[0].mean()
	print(f' ✓ librosa feature extraction works (RMS: {rms:.4f})')
	PYTHON_EOF
	echo ""

	# Test 8: Synthetic data generation (1 sample)
	echo "🎵 Testing synthetic data generation..."
	python3 scripts/data/create_synthetic_test_data.py \
	--output test_data_tmp \
	--samples 1

	# Check if files created
	AUDIO_COUNT=$(find test_data_tmp -name "*.wav" \| wc -l)
	echo " ✓ Created $AUDIO_COUNT audio files"

	if [ "$AUDIO_COUNT" -ne 7 ]; then
	echo " ❌ Expected 7 files, got $AUDIO_COUNT"
	exit 1
	fi

	# Test audio loading
	FIRST_AUDIO=$(find test_data_tmp -name "*.wav" \| head -1)
	python3 << PYTHON_EOF2
	import soundfile as sf
	import librosa

	audio, sr = sf.read('$FIRST_AUDIO')
	print(f' ✓ Audio loading works: {len(audio)/sr:.1f}s @ {sr}Hz')

	rms = librosa.feature.rms(y=audio)[0].mean()
	print(f' ✓ Feature extraction: RMS={rms:.4f}')
	PYTHON_EOF2
	echo ""

	# Test 9: Voting system
	echo "🗳️ Testing ensemble voting..."
	python3 << 'PYTHON_EOF3'
	import sys
	sys.path.insert(0, '.')

	from ensemble_tts.voting import WeightedVoting

	predictions = [
	{'label': 'happy', 'confidence': 0.85, 'model_name': 'model1', 'model_weight': 0.5},
	{'label': 'happy', 'confidence': 0.75, 'model_name': 'model2', 'model_weight': 0.3},
	{'label': 'neutral', 'confidence': 0.65, 'model_name': 'model3', 'model_weight': 0.2},
	]

	voter = WeightedVoting()
	result = voter.vote(predictions, key='label')

	print(f' ✓ Voting works: {result["label"]} ({result["confidence"]:.2%})')
	PYTHON_EOF3
	echo ""

	# Cleanup
	rm -rf test_data_tmp

	echo "=================================================="
	echo "✅ ALL TESTS PASSED!"
	echo "=================================================="
	echo ""
	echo "📊 Summary:"
	echo " ✓ Machine provisioned successfully"
	echo " ✓ Internet connectivity working"
	echo " ✓ Python environment functional"
	echo " ✓ Repository cloned and validated"
	echo " ✓ Dependencies installed"
	echo " ✓ Synthetic data generation works"
	echo " ✓ Audio processing works"
	echo " ✓ Ensemble voting works"
	echo ""
	echo "🎉 SkyPilot infrastructure validated!"
	echo ""
	echo "💰 Cost: ~$0.10 for this test (5 minutes)"
	echo ""
	echo "📝 Next steps:"
	echo " 1. Fine-tune: sky launch scripts/cloud/skypilot_finetune.yaml"
	echo " 2. Multi-GPU: sky launch scripts/cloud/skypilot_multi_gpu.yaml"
	echo " 3. Annotate: sky launch scripts/cloud/skypilot_annotate_orpheus.yaml"
	echo ""

	num_nodes: 1