| tokenizers_info: | |
| - name: AA | |
| tokenizer_id: 0 | |
| json_path: ./t5_tokenizer_AA_special.json | |
| modular_json_path: ./t5_tokenizer_AA_special.json | |
| start_delimiter: <start_AA> | |
| end_delimiter: <end_AA> | |
| - name: SMILES | |
| tokenizer_id: 1 | |
| json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json | |
| modular_json_path: ./bpe_tokenizer_trained_on_chembl_zinc_with_aug_4272372_samples_balanced_1_1.json | |
| start_delimiter: <start_SMILES> | |
| end_delimiter: <end_SMILES> | |
| - name: CELL_ATTRIBUTES | |
| tokenizer_id: 2 | |
| json_path: ./cell_attributes_tokenizer.json | |
| modular_json_path: ./cell_attributes_tokenizer.json | |
| start_delimiter: <start_CELL_ATTRIBUTES> | |
| end_delimiter: <end_CELL_ATTRIBUTES> | |
| - name: GENE | |
| tokenizer_id: 3 | |
| json_path: ./gene_tokenizer.json | |
| modular_json_path: ./gene_tokenizer.json | |
| start_delimiter: <start_GENE> | |
| end_delimiter: <end_GENE> | |
| minimal_token_id: 5000 | |
| max_possible_token_id: 100000 | |
| max_special_token_id: 500 | |