| import os | |
| import hydra | |
| import librosa | |
| import utils | |
| from os.path import expanduser, exists, basename, join | |
| from utils import read_filelist, write_filelist, find_all_files | |
| from tqdm import tqdm | |
| def preprocess(cfg): | |
| os.makedirs('filelists', exist_ok=True) | |
| # train | |
| root = cfg.preprocess.datasets.LibriSpeech.root_val | |
| root = expanduser(root) | |
| trainfiles = [] | |
| print(f'Root: {root}') | |
| for subset in cfg.preprocess.datasets.LibriSpeech.testsets: | |
| files = find_all_files(join(root, subset), '.flac') | |
| print(f'Found {len(files)} flac files in {subset}') | |
| for i in range(len(files)): | |
| files[i][1] = files[i][1].replace(root, '').lstrip('/') | |
| trainfiles.extend(files) | |
| print(f'Write train filelist to {cfg.preprocess.view.test_filelist}') | |
| os.makedirs('filelists', exist_ok=True) | |
| utils.write_filelist(trainfiles, cfg.preprocess.view.test_filelist) | |
| if __name__ == '__main__': | |
| preprocess() | |