File size: 6,132 Bytes
56c4b9b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import math
import os
import pandas as pd
import random
import shutil
import time
from code_generation import generate_and_debug, prepare_working_folder, code_execution, get_results
from program_database import ProgramsDatabase, ProgramsDatabaseConfig
def get_seed_score(nRMSE, convergence_rate):
return {
'bucketed_convergence_rate': int(max(0, convergence_rate)*4),
'bucketed_nRMSE': int(-math.log10(min(1e9, nRMSE))*10)
}
def funsearch(cfg):
num_trials = cfg.method.num_debugging_trials_per_sample
pde_name = cfg.pde.name
working_folder = cfg.working_folder
model_name = cfg.model.name
num_search_rounds = cfg.method.num_search_rounds
num_initial_seeds = cfg.method.num_initial_seeds
use_sample_solver_init = cfg.method.use_sample_solver_init
assert use_sample_solver_init, 'Sample solvers must be enabled for refinement'
sample_solver_folder = os.path.join(
'solvers', pde_name, cfg.pde.pde_setting_name, 'seeds'
)
sample_solver_info = pd.read_csv(
os.path.join(sample_solver_folder, 'seed_results.csv')
)
prepare_working_folder(
cfg,
working_folder=working_folder,
pde_name=pde_name,
use_sample_solver_init=use_sample_solver_init
)
pd_cfg = ProgramsDatabaseConfig()
program_db = ProgramsDatabase(pd_cfg)
# The first round: generate without seed
seed_path = os.path.join(
'../archived_logs',
pde_name,
cfg.pde.pde_setting_name,
'repeated_sample',
model_name
)
subdirectories = [d for d in os.listdir(seed_path) if os.path.isdir(os.path.join(seed_path, d))]
assert len(subdirectories) == 1, 'Only one subdirectory is expected'
seed_path = os.path.join(seed_path, subdirectories[0])
result_sheet = pd.read_csv(os.path.join(seed_path, 'test_results.csv'))
for i in range(num_initial_seeds):
relevant_files = [
'errors_{idx}.txt',
'implementation_{idx}.py',
'output_{idx}.txt',
]
complete_seed = True
for file in relevant_files:
if not os.path.exists(os.path.join(seed_path, file.format(idx=i))):
complete_seed = False
break
if result_sheet[result_sheet['round'] == i].empty:
complete_seed = False
seed_info = result_sheet[result_sheet['round'] == i].to_numpy().tolist()[0]
seed_info = [str(x) for x in seed_info]
if seed_info[1] == 'failed':
complete_seed = False
if not complete_seed:
continue
# The seed is complete, copy it to the working folder
for file in relevant_files:
source_file = os.path.join(seed_path, file.format(idx=int(i)))
destination_file = os.path.join(working_folder, file.format(idx=int(i)))
shutil.copy(source_file, destination_file)
with open(os.path.join(working_folder, 'test_results.csv'), 'a') as f:
seed_info[0] = str(int(i))
f.write(','.join(seed_info) + '\n')
# Register the seed in the database
seed_score = get_seed_score(float(seed_info[1]), float(seed_info[3]))
with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f:
implementation = f.readlines()
program_len = len(implementation)
program_db.register_program(
program=i,
program_len=program_len,
island_id=None,
scores_per_test=seed_score,
)
for i in range(num_initial_seeds, num_initial_seeds+num_search_rounds):
island_id, seed_ids = program_db.get_seed()
try:
relative_error, elapsed_time, avg_rate = generate_and_debug(
cfg,
round_idx=i,
num_trials=num_trials,
pde_name=pde_name,
working_folder=working_folder,
seed_implementations=seed_ids,
model_name=model_name
)
seed_score = get_seed_score(float(relative_error), float(avg_rate))
with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f:
implementation = f.readlines()
program_len = len(implementation)
program_db.register_program(
program=i,
program_len=program_len,
island_id=island_id,
scores_per_test=seed_score,
)
except Exception as e:
print(f'Error in round {i}: {e}. Move on to the next sample.')
# Finally, report the best program
results = pd.read_csv(os.path.join(working_folder, 'test_results.csv'))
keywords = ['nRMSE', 'elapsed_time', 'convergence_rate']
for keyword in keywords:
results[keyword] = pd.to_numeric(results[keyword], errors="coerce")
# Sort by nRMSE, elapsed_time, and convergence_rate
sorted_results = results.sort_values(by=keywords, ascending=[True, True, False])
best_idx = int(sorted_results.head(1)["round"].values[0])
test_run_id = 999
shutil.copy(
os.path.join(working_folder, f'implementation_{best_idx}.py'),
os.path.join(working_folder, f'implementation_{test_run_id}.py')
)
execution_results = code_execution(
cfg,
working_folder = working_folder,
round_idx=test_run_id,
pde_name=pde_name,
eval_dataset=os.path.join(
cfg.root_dataset_folder,
cfg.pde.dataset_folder_for_eval.replace('_development.hdf5', '.hdf5')
)
)
if execution_results['exit_code'] != 0:
relative_error, elapsed_time, avg_rate = None, None, None
else:
relative_error, elapsed_time, avg_rate = get_results(
os.path.join(working_folder, f'output_{test_run_id}.txt')
)
with open(os.path.join(working_folder, 'final_result.txt'), 'w') as f:
f.write('best_idx,relative_error,elapsed_time,avg_rate\n')
f.write(f'{best_idx},{relative_error},{elapsed_time},{avg_rate}\n')
|