#!/bin/bash # # test_models_parallel_gnu.sh - Run multiple ATLAS models in parallel using GNU parallel # # This script processes multiple LLM models for ATLAS H→γγ analysis. # For each model, it runs 5 independent Snakemake workflows in parallel. # # Usage: # ./test_models_parallel_gnu.sh [output_name] [max_concurrent_models] [tasks_per_model] # # Arguments: # output_name: Name of output directory (default: "test") # max_concurrent_models: Maximum models to run simultaneously (default: 5) # tasks_per_model: Number of parallel tasks per model (default: 5) # # Examples: # ./test_models_parallel_gnu.sh # Basic usage # ./test_models_parallel_gnu.sh experiment1 # Custom output name # ./test_models_parallel_gnu.sh test 3 5 # 3 models at once, 5 tasks each # ./test_models_parallel_gnu.sh large_test 10 5 # 10 models, 5 tasks each # # Requirements: # - GNU parallel must be installed # - models.txt file with list of models to test # - All workflow/*.smk files must be present # - Python environment with required packages # # Features: # - Scales to 20-30 models with 10 jobs each (200-300 total jobs) # - Independent task execution - order doesn't matter # - Automatic resource management via GNU parallel # - Comprehensive error handling and logging # - Temporary workspace in /dev/shm for fast I/O # # Output Structure: # output_name/ # ├── model1_timestamp1/ # │ ├── generated_code/ # │ ├── logs/ # │ ├── plots/ # │ ├── prompt_pairs/ # │ ├── snakemake_log/ # │ └── stats.csv # └── model2_timestamp2/ # └── ... # module load python # Save the directory where the script was started ORIG_DIR=$(pwd) # Create a unique random folder in /dev/shm TMPDIR=$(mktemp -d /dev/shm/llm_run_temp_XXXXXX) WORKDIR="$TMPDIR/llm_for_analysis" conda activate llm_env # Get the root of the current Git repository SRC_DIR=$(git rev-parse --show-toplevel) echo "Using Git repository root: $SRC_DIR" # Copy files from the Git repo root, excluding .git, results/, and .snakemake/ rsync -av \ --exclude='.git' \ --exclude='results/' \ --exclude='.snakemake/' \ --exclude='test/' \ "$SRC_DIR/" \ "$WORKDIR/" chmod +x "$WORKDIR/test_stats_parallel.sh" cd "$WORKDIR" mkdir -p results MODEL_LIST="models.txt" OUT_NAME="${1:-test}" # Take from first argument, default to "test" MAX_JOBS="${2:-5}" # Maximum concurrent models (default 5) TASK_JOBS="${3:-5}" # Jobs per model task (default 5) echo "Configuration:" echo " Output directory: $OUT_NAME" echo " Max concurrent models: $MAX_JOBS" echo " Tasks per model: $TASK_JOBS" echo " Total potential jobs: $((MAX_JOBS * TASK_JOBS))" echo "" # Function to process a single model process_model() { local model=$1 local out_name=$2 local src_dir=$3 local work_dir=$4 # Use timestamp for unique run naming local timestamp=$(date +"%Y%m%d_%H%M%S") local MODEL_SAFE="${model//\//_}_$timestamp" export MODEL_NAME="$model" echo "Starting model [$timestamp]: $model" # Create config file for this run cat > config.yml <