# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # import gradio as gr from config import VOICE_MODE_CLONE from ..core.state import ( generation_state_lock, get_stop_generation_requested, set_stop_generation_requested ) from ..core.authentication import get_huggingface_token from ..core.memory import ( has_temporary_files_pending_cleanup, cleanup_expired_temporary_files, perform_memory_cleanup, memory_cleanup, trigger_background_cleanup_check ) from ..tts.manager import text_to_speech_manager from ..validation.text import validate_text_input def check_if_generating(): from ..core.state import is_currently_generating with generation_state_lock: return is_currently_generating def request_generation_stop(): set_stop_generation_requested(True) return gr.update(interactive=False) def perform_speech_generation( text_input, voice_mode_selection, voice_preset_selection, voice_clone_audio_file, model_variant, lsd_decode_steps, temperature, noise_clamp, eos_threshold, frames_after_eos, enable_custom_frames ): from ..core import state as global_state if has_temporary_files_pending_cleanup(): cleanup_expired_temporary_files() perform_memory_cleanup() is_valid, validation_result = validate_text_input(text_input) if not is_valid: if validation_result: raise gr.Error(validation_result) raise gr.Error("Please enter valid text to generate speech.") if voice_mode_selection == VOICE_MODE_CLONE: if not voice_clone_audio_file: raise gr.Error("Please upload an audio file for voice cloning.") if not get_huggingface_token(): raise gr.Error("Voice cloning is not configured properly at the moment. Please try again later.") with generation_state_lock: if global_state.is_currently_generating: raise gr.Error("A generation is already in progress. Please wait.") global_state.is_currently_generating = True global_state.stop_generation_requested = False generated_audio_tensor = None cloned_voice_state_tensor = None try: text_to_speech_manager.load_or_get_model( model_variant, temperature, lsd_decode_steps, noise_clamp, eos_threshold ) with generation_state_lock: if global_state.stop_generation_requested: return None if voice_mode_selection == VOICE_MODE_CLONE: cloned_voice_state_tensor = text_to_speech_manager.get_voice_state_for_clone(voice_clone_audio_file) voice_state = cloned_voice_state_tensor else: voice_state = text_to_speech_manager.get_voice_state_for_preset(voice_preset_selection) with generation_state_lock: if global_state.stop_generation_requested: return None generated_audio_tensor = text_to_speech_manager.generate_audio( validation_result, voice_state, frames_after_eos, enable_custom_frames ) with generation_state_lock: if global_state.stop_generation_requested: return None output_file_path = text_to_speech_manager.save_audio_to_file(generated_audio_tensor) return output_file_path except gr.Error: raise except RuntimeError as runtime_error: raise gr.Error(str(runtime_error)) except Exception as generation_error: raise gr.Error(f"Speech generation failed: {str(generation_error)}") finally: with generation_state_lock: global_state.is_currently_generating = False global_state.stop_generation_requested = False if generated_audio_tensor is not None: del generated_audio_tensor generated_audio_tensor = None if cloned_voice_state_tensor is not None: del cloned_voice_state_tensor cloned_voice_state_tensor = None memory_cleanup() trigger_background_cleanup_check()