Spaces:
Sleeping
Sleeping
Commit
Β·
49c0f5a
1
Parent(s):
94b0409
Implement lazy loading for TTS models
Browse filesModels now load on-demand when voice is selected for generation instead of pre-loading all models at startup. This reduces memory usage and improves startup time.
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
app.py
CHANGED
|
@@ -30,18 +30,21 @@ MODELS = {
|
|
| 30 |
"Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
|
| 31 |
}
|
| 32 |
|
| 33 |
-
#
|
| 34 |
-
print("Loading models...")
|
| 35 |
models = {}
|
| 36 |
tokenizers = {}
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
# LFM2 Special Tokens Configuration
|
| 47 |
TOKENIZER_LENGTH = 64400
|
|
@@ -131,9 +134,8 @@ def generate_speech(text, model_choice, temperature, top_p, repetition_penalty,
|
|
| 131 |
return None
|
| 132 |
|
| 133 |
try:
|
| 134 |
-
progress(0.1, "π
|
| 135 |
-
model =
|
| 136 |
-
tokenizer = tokenizers[model_choice]
|
| 137 |
|
| 138 |
# Voice parameter is always None for LFM2 models
|
| 139 |
input_ids, attention_mask = process_prompt(text, tokenizer, device)
|
|
|
|
| 30 |
"Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
|
| 31 |
}
|
| 32 |
|
| 33 |
+
# Store for lazy-loaded models
|
|
|
|
| 34 |
models = {}
|
| 35 |
tokenizers = {}
|
| 36 |
|
| 37 |
+
def load_model_if_needed(model_choice):
|
| 38 |
+
"""Load model and tokenizer on-demand when needed"""
|
| 39 |
+
if model_choice not in models:
|
| 40 |
+
model_name = MODELS[model_choice]
|
| 41 |
+
print(f"Loading {model_choice} model: {model_name}")
|
| 42 |
+
models[model_choice] = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
|
| 43 |
+
models[model_choice].to(device)
|
| 44 |
+
tokenizers[model_choice] = AutoTokenizer.from_pretrained(model_name)
|
| 45 |
+
print(f"{model_choice} model loaded successfully!")
|
| 46 |
+
|
| 47 |
+
return models[model_choice], tokenizers[model_choice]
|
| 48 |
|
| 49 |
# LFM2 Special Tokens Configuration
|
| 50 |
TOKENIZER_LENGTH = 64400
|
|
|
|
| 134 |
return None
|
| 135 |
|
| 136 |
try:
|
| 137 |
+
progress(0.1, "π Loading model and processing text...")
|
| 138 |
+
model, tokenizer = load_model_if_needed(model_choice)
|
|
|
|
| 139 |
|
| 140 |
# Voice parameter is always None for LFM2 models
|
| 141 |
input_ids, attention_mask = process_prompt(text, tokenizer, device)
|