Spaces:

PierrunoYT
/

VyvoTTS-LFM2

Sleeping

PierrunoYT Claude commited on Aug 16

Commit

49c0f5a

1 Parent(s): 94b0409

Implement lazy loading for TTS models

Models now load on-demand when voice is selected for generation instead of pre-loading all models at startup. This reduces memory usage and improves startup time.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show

app.py +14 -12

app.py CHANGED Viewed

@@ -30,18 +30,21 @@ MODELS = {
     "Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
 }
-# Pre-load all models
-print("Loading models...")
 models = {}
 tokenizers = {}
-for lang, model_name in MODELS.items():
-    print(f"Loading {lang} model: {model_name}")
-    models[lang] = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
-    models[lang].to(device)
-    tokenizers[lang] = AutoTokenizer.from_pretrained(model_name)
-print("All models loaded successfully!")
 # LFM2 Special Tokens Configuration
 TOKENIZER_LENGTH = 64400
@@ -131,9 +134,8 @@ def generate_speech(text, model_choice, temperature, top_p, repetition_penalty,
         return None
     try:
-        progress(0.1, "🔄 Processing text...")
-        model = models[model_choice]
-        tokenizer = tokenizers[model_choice]
         # Voice parameter is always None for LFM2 models
         input_ids, attention_mask = process_prompt(text, tokenizer, device)

     "Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
 }
+# Store for lazy-loaded models
 models = {}
 tokenizers = {}
+def load_model_if_needed(model_choice):
+    """Load model and tokenizer on-demand when needed"""
+    if model_choice not in models:
+        model_name = MODELS[model_choice]
+        print(f"Loading {model_choice} model: {model_name}")
+        models[model_choice] = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
+        models[model_choice].to(device)
+        tokenizers[model_choice] = AutoTokenizer.from_pretrained(model_name)
+        print(f"{model_choice} model loaded successfully!")
+    return models[model_choice], tokenizers[model_choice]
 # LFM2 Special Tokens Configuration
 TOKENIZER_LENGTH = 64400
         return None
     try:
+        progress(0.1, "🔄 Loading model and processing text...")
+        model, tokenizer = load_model_if_needed(model_choice)
         # Voice parameter is always None for LFM2 models
         input_ids, attention_mask = process_prompt(text, tokenizer, device)