|
|
#!/bin/bash |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if [ -f ~/.apikeys.sh ]; then |
|
|
source ~/.apikeys.sh |
|
|
fi |
|
|
|
|
|
|
|
|
RUN_STEP1=false |
|
|
RUN_STEP2=false |
|
|
RUN_STEP3=false |
|
|
RUN_STEP4=false |
|
|
RUN_STEP5=false |
|
|
VALIDATE_STEPS=false |
|
|
OUTPUT_DIR="results" |
|
|
CONFIG="config.yml" |
|
|
|
|
|
|
|
|
PROJECT_ROOT="$(pwd)" |
|
|
|
|
|
|
|
|
while [[ $# -gt 0 ]]; do |
|
|
case $1 in |
|
|
--step1) |
|
|
RUN_STEP1=true |
|
|
shift |
|
|
;; |
|
|
--step2) |
|
|
RUN_STEP2=true |
|
|
shift |
|
|
;; |
|
|
--step3) |
|
|
RUN_STEP3=true |
|
|
shift |
|
|
;; |
|
|
--step4) |
|
|
RUN_STEP4=true |
|
|
shift |
|
|
;; |
|
|
--step5) |
|
|
RUN_STEP5=true |
|
|
shift |
|
|
;; |
|
|
--validate) |
|
|
VALIDATE_STEPS=true |
|
|
shift |
|
|
;; |
|
|
--out-dir) |
|
|
OUTPUT_DIR="$2" |
|
|
shift |
|
|
shift |
|
|
;; |
|
|
--job-id) |
|
|
|
|
|
OUTPUT_DIR="results_job_$2" |
|
|
shift |
|
|
shift |
|
|
;; |
|
|
--auto-dir) |
|
|
|
|
|
TIMESTAMP=$(date +"%Y%m%d_%H%M%S") |
|
|
OUTPUT_DIR="results_${TIMESTAMP}" |
|
|
shift |
|
|
;; |
|
|
--config) |
|
|
CONFIG="$2" |
|
|
shift |
|
|
shift |
|
|
;; |
|
|
--help|-h) |
|
|
echo "Usage: $0 [OPTIONS]" |
|
|
echo "" |
|
|
echo "Run Snakemake workflows for ATLAS analysis" |
|
|
echo "" |
|
|
echo "Options:" |
|
|
echo " --step1 Run summarize_root workflow (both rules: data generation + prompt processing)" |
|
|
echo " --step2 Run create_numpy workflow" |
|
|
echo " --step3 Run preprocess workflow" |
|
|
echo " --step4 Run scores workflow" |
|
|
echo " --step5 Run categorization workflow" |
|
|
echo " --validate Run validation after each successful step" |
|
|
echo " --out-dir DIR Custom output directory (default: results)" |
|
|
echo " --job-id ID Create unique directory: results_job_ID" |
|
|
echo " --auto-dir Create unique directory with timestamp: results_YYYYMMDD_HHMMSS" |
|
|
echo " --help Show this help message" |
|
|
echo "" |
|
|
echo "Examples:" |
|
|
echo " $0 --step1 --auto-dir # results_20250916_143052/" |
|
|
echo " $0 --step1 --job-id 12345 # results_job_12345/" |
|
|
echo " $0 --step1 --out-dir my_run_1 # my_run_1/" |
|
|
echo "" |
|
|
echo "If no options are provided, all steps are run sequentially." |
|
|
exit 0 |
|
|
;; |
|
|
*) |
|
|
echo "Unknown option: $1" |
|
|
echo "Use --help for usage information" |
|
|
exit 1 |
|
|
;; |
|
|
esac |
|
|
done |
|
|
|
|
|
|
|
|
if [[ "$RUN_STEP1" == "false" && "$RUN_STEP2" == "false" && "$RUN_STEP3" == "false" && "$RUN_STEP4" == "false" && "$RUN_STEP5" == "false" ]]; then |
|
|
RUN_STEP1=true |
|
|
RUN_STEP2=true |
|
|
RUN_STEP3=true |
|
|
RUN_STEP4=true |
|
|
RUN_STEP5=true |
|
|
echo "=== Running All Snakemake Workflows Sequentially (Output: ${OUTPUT_DIR}) ===" |
|
|
else |
|
|
echo "=== Running Selected Snakemake Workflows (Output: ${OUTPUT_DIR}) ===" |
|
|
fi |
|
|
echo "" |
|
|
|
|
|
|
|
|
module load python |
|
|
conda activate llm_env |
|
|
|
|
|
|
|
|
if [[ "${CONFIG}" = /* ]]; then |
|
|
CONFIG_PATH="${CONFIG}" |
|
|
else |
|
|
CONFIG_PATH="${PROJECT_ROOT}/${CONFIG}" |
|
|
fi |
|
|
|
|
|
if [[ ! -f "${CONFIG_PATH}" ]]; then |
|
|
echo "β Config file not found at ${CONFIG_PATH}" |
|
|
exit 1 |
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
OUTPUT_DIR="${OUTPUT_DIR%/}" |
|
|
if [[ "${OUTPUT_DIR}" = /* ]]; then |
|
|
BASE_DIR="${OUTPUT_DIR}" |
|
|
else |
|
|
BASE_DIR="$PWD/${OUTPUT_DIR}" |
|
|
fi |
|
|
|
|
|
echo "Preparing workflow files..." |
|
|
mkdir -p ${OUTPUT_DIR}/prompts_temp |
|
|
cp -r prompts/* ${OUTPUT_DIR}/prompts_temp/ |
|
|
sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/prompts_temp/*.txt |
|
|
|
|
|
cp workflow/summarize_root.smk ${OUTPUT_DIR}/summarize_root_temp.smk |
|
|
cp workflow/create_numpy.smk ${OUTPUT_DIR}/create_numpy_temp.smk |
|
|
cp workflow/preprocess.smk ${OUTPUT_DIR}/preprocess_temp.smk |
|
|
cp workflow/scores.smk ${OUTPUT_DIR}/scores_temp.smk |
|
|
cp workflow/categorization.smk ${OUTPUT_DIR}/categorization_temp.smk |
|
|
cp supervisor_coder.py ${OUTPUT_DIR}/supervisor_coder.py |
|
|
cp write_prompt.py ${OUTPUT_DIR}/write_prompt.py |
|
|
cp check_soln.py ${OUTPUT_DIR}/check_soln.py |
|
|
|
|
|
sed -i "s#{BASE_DIR}#${BASE_DIR}#g" ${OUTPUT_DIR}/*_temp.smk |
|
|
|
|
|
sed -i "s#{CONFIG}#${CONFIG_PATH}#g" ${OUTPUT_DIR}/*_temp.smk |
|
|
|
|
|
|
|
|
echo "Copying reference solution arrays for validation..." |
|
|
mkdir -p ${OUTPUT_DIR}/solution/arrays |
|
|
|
|
|
rm -f ${OUTPUT_DIR}/solution/arrays/* |
|
|
cp solution/arrays/* ${OUTPUT_DIR}/solution/arrays/ |
|
|
|
|
|
|
|
|
mkdir -p ${OUTPUT_DIR}/generated_code |
|
|
mkdir -p ${OUTPUT_DIR}/logs |
|
|
cp utils.py ${OUTPUT_DIR}/generated_code/utils.py |
|
|
|
|
|
|
|
|
rm -f ${OUTPUT_DIR}/logs/success.npy ${OUTPUT_DIR}/logs/calls.npy ${OUTPUT_DIR}/logs/input_tokens.npy ${OUTPUT_DIR}/logs/output_tokens.npy |
|
|
|
|
|
echo "Starting sequential execution..." |
|
|
echo "" |
|
|
|
|
|
|
|
|
run_workflow() { |
|
|
local workflow_name=$1 |
|
|
local smk_file=$2 |
|
|
local target=$3 |
|
|
local step_number=$4 |
|
|
|
|
|
echo "=========================================" |
|
|
echo "Running: $workflow_name" |
|
|
echo "Target: $target" |
|
|
echo "Time: $(date)" |
|
|
echo "=========================================" |
|
|
|
|
|
|
|
|
if ! pushd "$OUTPUT_DIR" > /dev/null; then |
|
|
echo "β Failed to cd into $OUTPUT_DIR" |
|
|
return 1 |
|
|
fi |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo "Command: snakemake -s \"$smk_file\" -j 1 --forcerun \"$target\" --rerun-incomplete --configfile \"${CONFIG}\" --latency-wait 120 --verbose > logs/${workflow_name}.log 2>&1" |
|
|
echo "" |
|
|
|
|
|
local start_time=$SECONDS |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if snakemake -s "$smk_file" -j 1 --forcerun "$target" --rerun-incomplete --configfile "${CONFIG_PATH}" --latency-wait 120 --verbose > "logs/${workflow_name}.log" 2>&1; then |
|
|
local duration=$((SECONDS - start_time)) |
|
|
echo "" |
|
|
echo "β
$workflow_name completed successfully in ${duration}s" |
|
|
echo "" |
|
|
|
|
|
|
|
|
if [[ "$VALIDATE_STEPS" == "true" ]]; then |
|
|
echo "Running validation for Step $step_number..." |
|
|
if python check_soln.py --out_dir "${BASE_DIR}" --step $step_number >> "logs/${workflow_name}_validation.log" 2>&1; then |
|
|
echo "β
Step $step_number validation completed" |
|
|
|
|
|
if [[ -f "${OUTPUT_DIR}/logs/success.npy" ]]; then |
|
|
validation_result=$(python -c "import numpy as np; print(np.load('${OUTPUT_DIR}/logs/success.npy')[$step_number-1])") |
|
|
if [[ "$validation_result" == "1" ]]; then |
|
|
echo "β
Step $step_number validation: PASS" |
|
|
else |
|
|
echo "β Step $step_number validation: FAIL" |
|
|
fi |
|
|
fi |
|
|
else |
|
|
echo "β Step $step_number validation failed to run" |
|
|
fi |
|
|
echo "" |
|
|
fi |
|
|
popd > /dev/null |
|
|
return 0 |
|
|
else |
|
|
local duration=$((SECONDS - start_time)) |
|
|
echo "" |
|
|
echo "β $workflow_name failed after ${duration}s" |
|
|
echo "" |
|
|
popd > /dev/null |
|
|
return 1 |
|
|
fi |
|
|
} |
|
|
|
|
|
|
|
|
step_counter=1 |
|
|
|
|
|
if [[ "$RUN_STEP1" == "true" ]]; then |
|
|
echo "$step_counter. Running summarize_root workflow (both rules)..." |
|
|
|
|
|
run_workflow "summarize_root" "summarize_root_temp.smk" "summarize_root" 1 |
|
|
run_workflow "insert_root_summary" "summarize_root_temp.smk" "insert_root_summary" 1 |
|
|
((step_counter++)) |
|
|
fi |
|
|
|
|
|
if [[ "$RUN_STEP2" == "true" ]]; then |
|
|
echo "$step_counter. Running create_numpy workflow..." |
|
|
run_workflow "create_numpy" "create_numpy_temp.smk" "create_numpy" 2 |
|
|
((step_counter++)) |
|
|
fi |
|
|
|
|
|
if [[ "$RUN_STEP3" == "true" ]]; then |
|
|
echo "$step_counter. Running preprocess workflow..." |
|
|
run_workflow "preprocess" "preprocess_temp.smk" "preprocess" 3 |
|
|
((step_counter++)) |
|
|
fi |
|
|
|
|
|
if [[ "$RUN_STEP4" == "true" ]]; then |
|
|
echo "$step_counter. Running scores workflow..." |
|
|
run_workflow "scores" "scores_temp.smk" "scores" 4 |
|
|
((step_counter++)) |
|
|
fi |
|
|
|
|
|
if [[ "$RUN_STEP5" == "true" ]]; then |
|
|
echo "$step_counter. Running categorization workflow..." |
|
|
run_workflow "categorization" "categorization_temp.smk" "categorization" 5 |
|
|
((step_counter++)) |
|
|
fi |
|
|
|
|
|
echo "" |
|
|
echo "=== Sequential Execution Complete ===" |
|
|
echo "Check ${OUTPUT_DIR}/ for output files" |
|
|
echo "Check ${OUTPUT_DIR}/logs/*.log files for detailed logs" |
|
|
if [[ "$VALIDATE_STEPS" == "true" ]]; then |
|
|
echo "Check ${OUTPUT_DIR}/logs/*_validation.log files for validation results" |
|
|
fi |
|
|
|
|
|
|
|
|
if [[ "$RUN_STEP1" == "true" && "$RUN_STEP2" == "true" && "$RUN_STEP3" == "true" && "$RUN_STEP4" == "true" && "$RUN_STEP5" == "true" ]]; then |
|
|
echo "" |
|
|
if [[ "$VALIDATE_STEPS" == "false" ]]; then |
|
|
read -p "Run final comprehensive validation? (y/n): " -n 1 -r |
|
|
echo "" |
|
|
if [[ $REPLY =~ ^[Yy]$ ]]; then |
|
|
echo "Running final comprehensive validation..." |
|
|
python check_soln.py --out_dir ${OUTPUT_DIR} |
|
|
fi |
|
|
else |
|
|
echo "Running final comprehensive validation..." |
|
|
python check_soln.py --out_dir ${OUTPUT_DIR} |
|
|
fi |
|
|
else |
|
|
echo "" |
|
|
echo "Note: Final comprehensive validation skipped (not all steps were run)" |
|
|
fi |
|
|
|
|
|
|
|
|
echo "" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
echo -e "Done!\n" |
|
|
|