Spaces:
Running
Running
| # ReplicaLab training entrypoint for Northflank GPU jobs. | |
| # | |
| # Usage: | |
| # MODE=train ./scripts/train.sh # full training (scientist + lab manager) | |
| # MODE=scientist ./scripts/train.sh # scientist GRPO only | |
| # MODE=lab-manager ./scripts/train.sh # lab manager SFT only | |
| # MODE=eval ./scripts/train.sh # baseline evaluation only | |
| # MODE=server ./scripts/train.sh # just run server (default) | |
| # | |
| # The script starts the ReplicaLab server in the background (needed for | |
| # rollout evaluation), then runs the requested training flow. | |
| set -euo pipefail | |
| MODE="${MODE:-server}" | |
| SEED_COUNT="${SEED_COUNT:-8}" | |
| MAX_STEPS="${MAX_STEPS:-300}" | |
| MODEL_NAME="${MODEL_NAME:-Qwen/Qwen3.5-9B}" | |
| PERSIST_ROOT="${REPLICALAB_PERSIST_ROOT:-/app/outputs/training}" | |
| BASE_URL="http://localhost:7860" | |
| echo "==========================================" | |
| echo " ReplicaLab Training Pipeline" | |
| echo "==========================================" | |
| echo " Mode: $MODE" | |
| echo " Model: $MODEL_NAME" | |
| echo " Seeds: $SEED_COUNT" | |
| echo " Max steps: $MAX_STEPS" | |
| echo " Persist: $PERSIST_ROOT" | |
| echo " Server URL: $BASE_URL" | |
| echo "==========================================" | |
| # ββ Start server in background (needed for eval rollouts) ββββββββββββββ | |
| start_server() { | |
| echo "[train.sh] Starting ReplicaLab server on port 7860..." | |
| uvicorn server.app:app --host 0.0.0.0 --port 7860 & | |
| SERVER_PID=$! | |
| echo "[train.sh] Server PID: $SERVER_PID" | |
| # Wait for server to be ready | |
| for i in $(seq 1 30); do | |
| if curl -sf http://localhost:7860/health > /dev/null 2>&1; then | |
| echo "[train.sh] Server is ready." | |
| return 0 | |
| fi | |
| sleep 1 | |
| done | |
| echo "[train.sh] WARNING: Server did not become ready in 30s, continuing anyway." | |
| } | |
| # ββ Scientist GRPO training βββββββββββββββββββββββββββββββββββββββββββ | |
| run_scientist_train() { | |
| echo "" | |
| echo "=== Phase 1: Scientist GRPO Training ===" | |
| echo "" | |
| # Preview first (no GPU needed) | |
| python -m replicalab.training.cli scientist-preview \ | |
| --persist-root "$PERSIST_ROOT" \ | |
| --model-name "$MODEL_NAME" \ | |
| --seed-count "$SEED_COUNT" | |
| # Full training | |
| python -m replicalab.training.cli scientist-train \ | |
| --persist-root "$PERSIST_ROOT" \ | |
| --model-name "$MODEL_NAME" \ | |
| --seed-count "$SEED_COUNT" \ | |
| --max-steps "$MAX_STEPS" | |
| echo "[train.sh] Scientist GRPO training complete." | |
| } | |
| # ββ Lab Manager SFT training βββββββββββββββββββββββββββββββββββββββββ | |
| run_lab_manager_train() { | |
| echo "" | |
| echo "=== Phase 2: Lab Manager SFT Training ===" | |
| echo "" | |
| # Preview first | |
| python -m replicalab.training.cli lab-manager-preview \ | |
| --persist-root "$PERSIST_ROOT" \ | |
| --model-name "$MODEL_NAME" \ | |
| --seed-count "$SEED_COUNT" | |
| # Full training | |
| python -m replicalab.training.cli lab-manager-train \ | |
| --persist-root "$PERSIST_ROOT" \ | |
| --model-name "$MODEL_NAME" \ | |
| --seed-count "$SEED_COUNT" | |
| echo "[train.sh] Lab Manager SFT training complete." | |
| } | |
| # ββ Baseline evaluation ββββββββββββββββββββββββββββββββββββββββββββββ | |
| run_eval() { | |
| echo "" | |
| echo "=== Baseline Evaluation ===" | |
| echo "" | |
| python -m replicalab.training.cli baseline-eval \ | |
| --persist-root "$PERSIST_ROOT" \ | |
| --base-url "$BASE_URL" \ | |
| --seed-count "$SEED_COUNT" | |
| echo "[train.sh] Evaluation complete." | |
| } | |
| # ββ Mode dispatch ββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| case "$MODE" in | |
| server) | |
| echo "[train.sh] Server-only mode." | |
| exec uvicorn server.app:app --host 0.0.0.0 --port 7860 | |
| ;; | |
| train) | |
| start_server | |
| run_scientist_train | |
| run_lab_manager_train | |
| run_eval | |
| echo "" | |
| echo "==========================================" | |
| echo " All training complete!" | |
| echo " Artifacts saved to: $PERSIST_ROOT" | |
| echo "==========================================" | |
| # Keep container alive so artifacts can be retrieved | |
| echo "[train.sh] Training done. Keeping container alive..." | |
| wait $SERVER_PID | |
| ;; | |
| scientist) | |
| run_scientist_train | |
| ;; | |
| lab-manager) | |
| run_lab_manager_train | |
| ;; | |
| eval) | |
| start_server | |
| run_eval | |
| wait $SERVER_PID | |
| ;; | |
| *) | |
| echo "Unknown MODE: $MODE" | |
| echo "Valid modes: server, train, scientist, lab-manager, eval" | |
| exit 1 | |
| ;; | |
| esac | |