| | |
| | |
| | |
| |
|
| | name: ensemble-test-minimal |
| |
|
| | resources: |
| | use_spot: true |
| |
|
| | |
| | accelerators: {L4:1, T4:1, V100:1} |
| | |
| | |
| |
|
| | memory: 8+ |
| | disk_size: 50 |
| |
|
| | setup: | |
| | set -e |
| | |
| | echo "==================================================" |
| | echo "π§ͺ SKYPILOT MINIMAL TEST" |
| | echo "==================================================" |
| | echo "" |
| | echo "Testing:" |
| | echo " 1. Machine provisioning β" |
| | echo " 2. Internet connectivity" |
| | echo " 3. Python environment" |
| | echo " 4. Git clone" |
| | echo " 5. Dependencies install" |
| | echo " 6. Repository structure" |
| | echo "" |
| |
|
| | |
| | echo "π Machine Info:" |
| | echo " Hostname: $(hostname)" |
| | echo " CPU cores: $(nproc)" |
| | echo " Memory: $(free -h | grep Mem | awk '{print $2}')" |
| | echo " Disk: $(df -h / | tail -1 | awk '{print $2}')" |
| |
|
| | |
| | if command -v nvidia-smi &> /dev/null; then |
| | echo " GPU: $(nvidia-smi --query-gpu=name --format=csv,noheader)" |
| | else |
| | echo " GPU: None (CPU-only)" |
| | fi |
| | echo "" |
| |
|
| | |
| | echo "π Testing internet connectivity..." |
| | curl -s -o /dev/null -w " HTTP Status: %{http_code}\n" https://huggingface.co/ |
| | echo "" |
| |
|
| | |
| | echo "π Python environment:" |
| | python3 --version |
| | pip3 --version |
| | echo "" |
| |
|
| | |
| | echo "π¦ Installing minimal dependencies..." |
| | pip install -q numpy soundfile librosa |
| | echo " β numpy, soundfile, librosa installed" |
| | echo "" |
| |
|
| | |
| | echo "π₯ Cloning repository..." |
| | if [ ! -d "ensemble-tts-annotation" ]; then |
| | git clone -q https://huggingface.co/marcosremar2/ensemble-tts-annotation |
| | echo " β Repository cloned" |
| | else |
| | echo " β Repository already exists" |
| | fi |
| | echo "" |
| |
|
| | echo "β
Setup complete!" |
| |
|
| | run: | |
| | cd ensemble-tts-annotation |
| | |
| | echo "" |
| | echo "==================================================" |
| | echo "π§ͺ RUNNING VALIDATION TESTS" |
| | echo "==================================================" |
| | echo "" |
| |
|
| | |
| | echo "π Validating repository structure..." |
| |
|
| | REQUIRED_FILES=( |
| | "README.md" |
| | "QUICK_START_SKYPILOT.md" |
| | "scripts/cloud/skypilot_finetune.yaml" |
| | "scripts/data/create_synthetic_test_data.py" |
| | "scripts/test/test_end_to_end.py" |
| | "ensemble_tts/__init__.py" |
| | ) |
| |
|
| | ALL_FOUND=true |
| | for file in "${REQUIRED_FILES[@]}"; do |
| | if [ -f "$file" ]; then |
| | echo " β $file" |
| | else |
| | echo " β $file NOT FOUND" |
| | ALL_FOUND=false |
| | fi |
| | done |
| | echo "" |
| |
|
| | if [ "$ALL_FOUND" = false ]; then |
| | echo "β Some files missing!" |
| | exit 1 |
| | fi |
| |
|
| | |
| | echo "π Testing Python imports..." |
| | python3 << 'PYTHON_EOF' |
| | import sys |
| | import numpy as np |
| | import soundfile as sf |
| | import librosa |
| |
|
| | print(' β numpy:', np.__version__) |
| | print(' β soundfile:', sf.__version__) |
| | print(' β librosa:', librosa.__version__) |
| |
|
| | |
| | audio = np.random.randn(16000) |
| | print(' β numpy array creation works') |
| |
|
| | |
| | rms = librosa.feature.rms(y=audio)[0].mean() |
| | print(f' β librosa feature extraction works (RMS: {rms:.4f})') |
| | PYTHON_EOF |
| | echo "" |
| |
|
| | |
| | echo "π΅ Testing synthetic data generation..." |
| | python3 scripts/data/create_synthetic_test_data.py \ |
| | --output test_data_tmp \ |
| | --samples 1 |
| |
|
| | |
| | AUDIO_COUNT=$(find test_data_tmp -name "*.wav" | wc -l) |
| | echo " β Created $AUDIO_COUNT audio files" |
| |
|
| | if [ "$AUDIO_COUNT" -ne 7 ]; then |
| | echo " β Expected 7 files, got $AUDIO_COUNT" |
| | exit 1 |
| | fi |
| |
|
| | |
| | FIRST_AUDIO=$(find test_data_tmp -name "*.wav" | head -1) |
| | python3 << PYTHON_EOF2 |
| | import soundfile as sf |
| | import librosa |
| |
|
| | audio, sr = sf.read('$FIRST_AUDIO') |
| | print(f' β Audio loading works: {len(audio)/sr:.1f}s @ {sr}Hz') |
| |
|
| | rms = librosa.feature.rms(y=audio)[0].mean() |
| | print(f' β Feature extraction: RMS={rms:.4f}') |
| | PYTHON_EOF2 |
| | echo "" |
| |
|
| | |
| | echo "π³οΈ Testing ensemble voting..." |
| | python3 << 'PYTHON_EOF3' |
| | import sys |
| | sys.path.insert(0, '.') |
| |
|
| | from ensemble_tts.voting import WeightedVoting |
| |
|
| | predictions = [ |
| | {'label': 'happy', 'confidence': 0.85, 'model_name': 'model1', 'model_weight': 0.5}, |
| | {'label': 'happy', 'confidence': 0.75, 'model_name': 'model2', 'model_weight': 0.3}, |
| | {'label': 'neutral', 'confidence': 0.65, 'model_name': 'model3', 'model_weight': 0.2}, |
| | ] |
| |
|
| | voter = WeightedVoting() |
| | result = voter.vote(predictions, key='label') |
| |
|
| | print(f' β Voting works: {result["label"]} ({result["confidence"]:.2%})') |
| | PYTHON_EOF3 |
| | echo "" |
| |
|
| | |
| | rm -rf test_data_tmp |
| |
|
| | echo "==================================================" |
| | echo "β
ALL TESTS PASSED!" |
| | echo "==================================================" |
| | echo "" |
| | echo "π Summary:" |
| | echo " β Machine provisioned successfully" |
| | echo " β Internet connectivity working" |
| | echo " β Python environment functional" |
| | echo " β Repository cloned and validated" |
| | echo " β Dependencies installed" |
| | echo " β Synthetic data generation works" |
| | echo " β Audio processing works" |
| | echo " β Ensemble voting works" |
| | echo "" |
| | echo "π SkyPilot infrastructure validated!" |
| | echo "" |
| | echo "π° Cost: ~$0.10 for this test (5 minutes)" |
| | echo "" |
| | echo "π Next steps:" |
| | echo " 1. Fine-tune: sky launch scripts/cloud/skypilot_finetune.yaml" |
| | echo " 2. Multi-GPU: sky launch scripts/cloud/skypilot_multi_gpu.yaml" |
| | echo " 3. Annotate: sky launch scripts/cloud/skypilot_annotate_orpheus.yaml" |
| | echo "" |
| |
|
| | num_nodes: 1 |
| |
|