LLM4HEP / run_smk_sequential.sh
ho22joshua's picture
initial commit
cfcbbc8
#!/bin/bash
#
# run_smk_sequential.sh - Run Snakemake workflows one at a time for debugging
#
# This script runs each Snakemake workflow sequentially to observe
# the behavior of prompt scripts, supervisor, and coder in real time.
#
# Usage:
# ./run_smk_sequential.sh # Run all steps
# ./run_smk_sequential.sh --step1 # Run summarize_root (both rules)
# ./run_smk_sequential.sh --step2 # Run create_numpy
# ./run_smk_sequential.sh --step3 # Run preprocess
# ./run_smk_sequential.sh --step4 # Run scores
# ./run_smk_sequential.sh --step5 # Run categorization
# ./run_smk_sequential.sh --step1 --step3 # Run summarize_root + preprocess
#
if [ -f ~/.apikeys.sh ]; then
source ~/.apikeys.sh
fi
# Parse command line arguments
RUN_STEP1=false
RUN_STEP2=false
RUN_STEP3=false
RUN_STEP4=false
RUN_STEP5=false
VALIDATE_STEPS=false
OUTPUT_DIR="results"
CONFIG="config.yml"
# Remember the project root where this script is invoked
PROJECT_ROOT="$(pwd)"
while [[ $# -gt 0 ]]; do
case $1 in
--step1)
RUN_STEP1=true
shift
;;
--step2)
RUN_STEP2=true
shift
;;
--step3)
RUN_STEP3=true
shift
;;
--step4)
RUN_STEP4=true
shift
;;
--step5)
RUN_STEP5=true
shift
;;
--validate)
VALIDATE_STEPS=true
shift
;;
--out-dir)
OUTPUT_DIR="$2"
shift
shift
;;
--job-id)
# Create unique directory based on job ID
OUTPUT_DIR="results_job_$2"
shift
shift
;;
--auto-dir)
# Create unique directory with timestamp
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
OUTPUT_DIR="results_${TIMESTAMP}"
shift
;;
--config)
CONFIG="$2"
shift
shift
;;
--help|-h)
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Run Snakemake workflows for ATLAS analysis"
echo ""
echo "Options:"
echo " --step1 Run summarize_root workflow (both rules: data generation + prompt processing)"
echo " --step2 Run create_numpy workflow"
echo " --step3 Run preprocess workflow"
echo " --step4 Run scores workflow"
echo " --step5 Run categorization workflow"
echo " --validate Run validation after each successful step"
echo " --out-dir DIR Custom output directory (default: results)"
echo " --job-id ID Create unique directory: results_job_ID"
echo " --auto-dir Create unique directory with timestamp: results_YYYYMMDD_HHMMSS"
echo " --help Show this help message"
echo ""
echo "Examples:"
echo " $0 --step1 --auto-dir # results_20250916_143052/"
echo " $0 --step1 --job-id 12345 # results_job_12345/"
echo " $0 --step1 --out-dir my_run_1 # my_run_1/"
echo ""
echo "If no options are provided, all steps are run sequentially."
exit 0
;;
*)
echo "Unknown option: $1"
echo "Use --help for usage information"
exit 1
;;
esac
done
# If no specific steps requested, run all
if [[ "$RUN_STEP1" == "false" && "$RUN_STEP2" == "false" && "$RUN_STEP3" == "false" && "$RUN_STEP4" == "false" && "$RUN_STEP5" == "false" ]]; then
RUN_STEP1=true
RUN_STEP2=true
RUN_STEP3=true
RUN_STEP4=true
RUN_STEP5=true
echo "=== Running All Snakemake Workflows Sequentially (Output: ${OUTPUT_DIR}) ==="
else
echo "=== Running Selected Snakemake Workflows (Output: ${OUTPUT_DIR}) ==="
fi
echo ""
# Set up environment
module load python
conda activate llm_env
# Resolve config file to an absolute path so Snakemake can always find it
if [[ "${CONFIG}" = /* ]]; then
CONFIG_PATH="${CONFIG}"
else
CONFIG_PATH="${PROJECT_ROOT}/${CONFIG}"
fi
if [[ ! -f "${CONFIG_PATH}" ]]; then
echo "❌ Config file not found at ${CONFIG_PATH}"
exit 1
fi
# Copy and prepare workflow files
OUTPUT_DIR="${OUTPUT_DIR%/}"
if [[ "${OUTPUT_DIR}" = /* ]]; then
BASE_DIR="${OUTPUT_DIR}"
else
BASE_DIR="$PWD/${OUTPUT_DIR}"
fi
echo "Preparing workflow files..."
mkdir -p ${OUTPUT_DIR}/prompts_temp
cp -r prompts/* ${OUTPUT_DIR}/prompts_temp/
sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/prompts_temp/*.txt
cp workflow/summarize_root.smk ${OUTPUT_DIR}/summarize_root_temp.smk
cp workflow/create_numpy.smk ${OUTPUT_DIR}/create_numpy_temp.smk
cp workflow/preprocess.smk ${OUTPUT_DIR}/preprocess_temp.smk
cp workflow/scores.smk ${OUTPUT_DIR}/scores_temp.smk
cp workflow/categorization.smk ${OUTPUT_DIR}/categorization_temp.smk
cp supervisor_coder.py ${OUTPUT_DIR}/supervisor_coder.py
cp write_prompt.py ${OUTPUT_DIR}/write_prompt.py
cp check_soln.py ${OUTPUT_DIR}/check_soln.py
sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/*_temp.smk
# Replace {CONFIG} in temp snakemake files with the absolute path to the project's config
sed -i "s#{CONFIG}#${CONFIG_PATH}#g" ${OUTPUT_DIR}/*_temp.smk
# Copy solutions for validation
echo "Copying reference solution arrays for validation..."
mkdir -p ${OUTPUT_DIR}/solution/arrays
# Remove any existing files first to avoid permission issues
rm -f ${OUTPUT_DIR}/solution/arrays/*
cp solution/arrays/* ${OUTPUT_DIR}/solution/arrays/
# Create output directory
mkdir -p ${OUTPUT_DIR}/generated_code
mkdir -p ${OUTPUT_DIR}/logs
cp utils.py ${OUTPUT_DIR}/generated_code/utils.py
# Clean up any existing numpy files (store metrics under logs)
rm -f ${OUTPUT_DIR}/logs/success.npy ${OUTPUT_DIR}/logs/calls.npy ${OUTPUT_DIR}/logs/input_tokens.npy ${OUTPUT_DIR}/logs/output_tokens.npy
echo "Starting sequential execution..."
echo ""
# Function to run a single workflow
run_workflow() {
local workflow_name=$1
local smk_file=$2
local target=$3
local step_number=$4
echo "========================================="
echo "Running: $workflow_name"
echo "Target: $target"
echo "Time: $(date)"
echo "========================================="
# cd into OUTPUT_DIR and do all the work there
if ! pushd "$OUTPUT_DIR" > /dev/null; then
echo "❌ Failed to cd into $OUTPUT_DIR"
return 1
fi
# Print the command that will be executed (run inside ${OUTPUT_DIR})
# Commented out original with --stats, kept for reference
# echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose --stats logs/${workflow_name}.stats > logs/${workflow_name}.log 2>&1"
echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose > logs/${workflow_name}.log 2>&1"
echo ""
local start_time=$SECONDS
# Run snakemake from inside the output directory. Use BASE_DIR for the config file
# so Snakemake can find the main config.yml even when cwd is the job folder.
# Original Snakemake run with --stats (commented out)
# if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG}" --latency-wait 120 --verbose --stats "logs/${workflow_name}.stats" > "logs/${workflow_name}.log" 2>&1; then
if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG_PATH}" --latency-wait 120 --verbose > "logs/${workflow_name}.log" 2>&1; then
local duration=$((SECONDS - start_time))
echo ""
echo "βœ… $workflow_name completed successfully in ${duration}s"
echo ""
# Run validation for this step if it completed successfully
if [[ "$VALIDATE_STEPS" == "true" ]]; then
echo "Running validation for Step $step_number..."
if python check_soln.py --out_dir "${BASE_DIR}" --step $step_number >> "logs/${workflow_name}_validation.log" 2>&1; then
echo "βœ… Step $step_number validation completed"
# Check if validation passed
if [[ -f "${OUTPUT_DIR}/logs/success.npy" ]]; then
validation_result=$(python -c "import numpy as np; print(np.load('${OUTPUT_DIR}/logs/success.npy')[$step_number-1])")
if [[ "$validation_result" == "1" ]]; then
echo "βœ… Step $step_number validation: PASS"
else
echo "❌ Step $step_number validation: FAIL"
fi
fi
else
echo "❌ Step $step_number validation failed to run"
fi
echo ""
fi
popd > /dev/null
return 0
else
local duration=$((SECONDS - start_time))
echo ""
echo "❌ $workflow_name failed after ${duration}s"
echo ""
popd > /dev/null
return 1
fi
}
# Run workflows sequentially based on flags
step_counter=1
if [[ "$RUN_STEP1" == "true" ]]; then
echo "$step_counter. Running summarize_root workflow (both rules)..."
# Run both rules: first summarize_root, then insert_root_summary
run_workflow "summarize_root" "summarize_root_temp.smk" "summarize_root" 1
run_workflow "insert_root_summary" "summarize_root_temp.smk" "insert_root_summary" 1
((step_counter++))
fi
if [[ "$RUN_STEP2" == "true" ]]; then
echo "$step_counter. Running create_numpy workflow..."
run_workflow "create_numpy" "create_numpy_temp.smk" "create_numpy" 2
((step_counter++))
fi
if [[ "$RUN_STEP3" == "true" ]]; then
echo "$step_counter. Running preprocess workflow..."
run_workflow "preprocess" "preprocess_temp.smk" "preprocess" 3
((step_counter++))
fi
if [[ "$RUN_STEP4" == "true" ]]; then
echo "$step_counter. Running scores workflow..."
run_workflow "scores" "scores_temp.smk" "scores" 4
((step_counter++))
fi
if [[ "$RUN_STEP5" == "true" ]]; then
echo "$step_counter. Running categorization workflow..."
run_workflow "categorization" "categorization_temp.smk" "categorization" 5
((step_counter++))
fi
echo ""
echo "=== Sequential Execution Complete ==="
echo "Check ${OUTPUT_DIR}/ for output files"
echo "Check ${OUTPUT_DIR}/logs/*.log files for detailed logs"
if [[ "$VALIDATE_STEPS" == "true" ]]; then
echo "Check ${OUTPUT_DIR}/logs/*_validation.log files for validation results"
fi
# Optional: Run final comprehensive validation (only if all steps were run)
if [[ "$RUN_STEP1" == "true" && "$RUN_STEP2" == "true" && "$RUN_STEP3" == "true" && "$RUN_STEP4" == "true" && "$RUN_STEP5" == "true" ]]; then
echo ""
if [[ "$VALIDATE_STEPS" == "false" ]]; then
read -p "Run final comprehensive validation? (y/n): " -n 1 -r
echo ""
if [[ $REPLY =~ ^[Yy]$ ]]; then
echo "Running final comprehensive validation..."
python check_soln.py --out_dir ${OUTPUT_DIR}
fi
else
echo "Running final comprehensive validation..."
python check_soln.py --out_dir ${OUTPUT_DIR}
fi
else
echo ""
echo "Note: Final comprehensive validation skipped (not all steps were run)"
fi
# Clean up
echo ""
# echo "Cleaning up temporary files..."
# Comment out the next line to keep prompts_temp for inspection
# rm -rf prompts_temp
# rm -f *_temp.smk
# rm -rf .snakemake # Clean up Snakemake's default log directory
echo -e "Done!\n"