File size: 6,132 Bytes
56c4b9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import math
import os
import pandas as pd
import random
import shutil
import time

from code_generation import generate_and_debug, prepare_working_folder, code_execution, get_results
from program_database import ProgramsDatabase, ProgramsDatabaseConfig
    
def get_seed_score(nRMSE, convergence_rate):
    return {
        'bucketed_convergence_rate': int(max(0, convergence_rate)*4),
        'bucketed_nRMSE': int(-math.log10(min(1e9, nRMSE))*10)
    }

def funsearch(cfg):
    num_trials = cfg.method.num_debugging_trials_per_sample
    pde_name = cfg.pde.name
    working_folder = cfg.working_folder
    model_name = cfg.model.name
    num_search_rounds = cfg.method.num_search_rounds
    num_initial_seeds = cfg.method.num_initial_seeds
    use_sample_solver_init = cfg.method.use_sample_solver_init
    assert use_sample_solver_init, 'Sample solvers must be enabled for refinement'

    sample_solver_folder = os.path.join(
        'solvers', pde_name, cfg.pde.pde_setting_name, 'seeds'
    )
    sample_solver_info = pd.read_csv(
        os.path.join(sample_solver_folder, 'seed_results.csv')
    )

    prepare_working_folder(
        cfg, 
        working_folder=working_folder, 
        pde_name=pde_name,
        use_sample_solver_init=use_sample_solver_init
    )

    pd_cfg = ProgramsDatabaseConfig()
    program_db = ProgramsDatabase(pd_cfg)

    # The first round: generate without seed
    seed_path = os.path.join(
        '../archived_logs', 
        pde_name, 
        cfg.pde.pde_setting_name,
        'repeated_sample',
        model_name
    )
    subdirectories = [d for d in os.listdir(seed_path) if os.path.isdir(os.path.join(seed_path, d))]
    assert len(subdirectories) == 1, 'Only one subdirectory is expected'
    seed_path = os.path.join(seed_path, subdirectories[0])
    result_sheet = pd.read_csv(os.path.join(seed_path, 'test_results.csv'))

    for i in range(num_initial_seeds):
        relevant_files = [
            'errors_{idx}.txt',
            'implementation_{idx}.py',
            'output_{idx}.txt',
        ]

        complete_seed = True
        for file in relevant_files:
            if not os.path.exists(os.path.join(seed_path, file.format(idx=i))):
                complete_seed = False
                break
        if result_sheet[result_sheet['round'] == i].empty:
            complete_seed = False
        
        seed_info = result_sheet[result_sheet['round'] == i].to_numpy().tolist()[0]
        seed_info = [str(x) for x in seed_info]
        if seed_info[1] == 'failed':
            complete_seed = False

        if not complete_seed:
            continue

        # The seed is complete, copy it to the working folder
        for file in relevant_files:
            source_file = os.path.join(seed_path, file.format(idx=int(i)))
            destination_file = os.path.join(working_folder, file.format(idx=int(i)))
            shutil.copy(source_file, destination_file)
        with open(os.path.join(working_folder, 'test_results.csv'), 'a') as f:
            seed_info[0] = str(int(i))
            f.write(','.join(seed_info) + '\n')
        
        # Register the seed in the database
        seed_score = get_seed_score(float(seed_info[1]), float(seed_info[3]))
        with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f:
            implementation = f.readlines()
            program_len = len(implementation)
        program_db.register_program(
            program=i,
            program_len=program_len,
            island_id=None,
            scores_per_test=seed_score,
        )

    for i in range(num_initial_seeds, num_initial_seeds+num_search_rounds):
        island_id, seed_ids = program_db.get_seed()
        try:
            relative_error, elapsed_time, avg_rate = generate_and_debug(
                cfg,
                round_idx=i,
                num_trials=num_trials,
                pde_name=pde_name,
                working_folder=working_folder,
                seed_implementations=seed_ids,
                model_name=model_name
            )
            seed_score = get_seed_score(float(relative_error), float(avg_rate))
            with open(os.path.join(working_folder, f'implementation_{i}.py'), 'r') as f:
                implementation = f.readlines()
                program_len = len(implementation)
            program_db.register_program(
                program=i,
                program_len=program_len,
                island_id=island_id,
                scores_per_test=seed_score,
            )
        except Exception as e:
            print(f'Error in round {i}: {e}. Move on to the next sample.')
        

    # Finally, report the best program
    results = pd.read_csv(os.path.join(working_folder, 'test_results.csv'))
    keywords = ['nRMSE', 'elapsed_time', 'convergence_rate']
    for keyword in keywords:
        results[keyword] = pd.to_numeric(results[keyword], errors="coerce")
    # Sort by nRMSE, elapsed_time, and convergence_rate
    sorted_results = results.sort_values(by=keywords, ascending=[True, True, False])
    best_idx = int(sorted_results.head(1)["round"].values[0])
    
    test_run_id = 999
    shutil.copy(
        os.path.join(working_folder, f'implementation_{best_idx}.py'),
        os.path.join(working_folder, f'implementation_{test_run_id}.py')
    )
    execution_results = code_execution(
        cfg,
        working_folder = working_folder,
        round_idx=test_run_id,
        pde_name=pde_name,
        eval_dataset=os.path.join(
            cfg.root_dataset_folder, 
            cfg.pde.dataset_folder_for_eval.replace('_development.hdf5', '.hdf5')
        )
    )

    if execution_results['exit_code'] != 0:
        relative_error, elapsed_time, avg_rate = None, None, None
    else:
        relative_error, elapsed_time, avg_rate = get_results(
            os.path.join(working_folder, f'output_{test_run_id}.txt')
        )
    with open(os.path.join(working_folder, 'final_result.txt'), 'w') as f:
        f.write('best_idx,relative_error,elapsed_time,avg_rate\n')
        f.write(f'{best_idx},{relative_error},{elapsed_time},{avg_rate}\n')