PierrunoYT Claude commited on
Commit
49c0f5a
Β·
1 Parent(s): 94b0409

Implement lazy loading for TTS models

Browse files

Models now load on-demand when voice is selected for generation instead of pre-loading all models at startup. This reduces memory usage and improves startup time.

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -30,18 +30,21 @@ MODELS = {
30
  "Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
31
  }
32
 
33
- # Pre-load all models
34
- print("Loading models...")
35
  models = {}
36
  tokenizers = {}
37
 
38
- for lang, model_name in MODELS.items():
39
- print(f"Loading {lang} model: {model_name}")
40
- models[lang] = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
41
- models[lang].to(device)
42
- tokenizers[lang] = AutoTokenizer.from_pretrained(model_name)
43
-
44
- print("All models loaded successfully!")
 
 
 
 
45
 
46
  # LFM2 Special Tokens Configuration
47
  TOKENIZER_LENGTH = 64400
@@ -131,9 +134,8 @@ def generate_speech(text, model_choice, temperature, top_p, repetition_penalty,
131
  return None
132
 
133
  try:
134
- progress(0.1, "πŸ”„ Processing text...")
135
- model = models[model_choice]
136
- tokenizer = tokenizers[model_choice]
137
 
138
  # Voice parameter is always None for LFM2 models
139
  input_ids, attention_mask = process_prompt(text, tokenizer, device)
 
30
  "Tighnari": "Vyvo/VyvoTTS-LFM2-Tighnari",
31
  }
32
 
33
+ # Store for lazy-loaded models
 
34
  models = {}
35
  tokenizers = {}
36
 
37
+ def load_model_if_needed(model_choice):
38
+ """Load model and tokenizer on-demand when needed"""
39
+ if model_choice not in models:
40
+ model_name = MODELS[model_choice]
41
+ print(f"Loading {model_choice} model: {model_name}")
42
+ models[model_choice] = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
43
+ models[model_choice].to(device)
44
+ tokenizers[model_choice] = AutoTokenizer.from_pretrained(model_name)
45
+ print(f"{model_choice} model loaded successfully!")
46
+
47
+ return models[model_choice], tokenizers[model_choice]
48
 
49
  # LFM2 Special Tokens Configuration
50
  TOKENIZER_LENGTH = 64400
 
134
  return None
135
 
136
  try:
137
+ progress(0.1, "πŸ”„ Loading model and processing text...")
138
+ model, tokenizer = load_model_if_needed(model_choice)
 
139
 
140
  # Voice parameter is always None for LFM2 models
141
  input_ids, attention_mask = process_prompt(text, tokenizer, device)