tts

Sleeping

App Files Files Community

hadadrjt commited on Jan 17

Commit

5da0109

1 Parent(s): d24817e

Pocket TTS: Let's take this seriously.

Browse files

Files changed (18) hide show

.dockerignore +3 -0
Dockerfile +1 -1
app.py +246 -1575
assets/css/styles.py +121 -0
assets/static/footer.py +34 -0
assets/static/header.py +18 -0
assets/static/sidebar.py +48 -0
assets/static/title.py +15 -0
config.py +88 -0
src/audio/converter.py +42 -0
src/core/authentication.py +23 -0
src/core/memory.py +359 -0
src/core/state.py +43 -0
src/generation/handler.py +135 -0
src/tts/manager.py +231 -0
src/ui/handlers.py +58 -0
src/ui/state.py +43 -0
src/validation/text.py +20 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,3 @@

+Dockerfile
+LICENSE
+README.md

Dockerfile CHANGED Viewed

@@ -7,4 +7,4 @@ FROM hadadrjt/pocket-tts:hf
 WORKDIR /app
-COPY app.py .


7
8	WORKDIR /app
9
10	+ COPY . .

app.py CHANGED Viewed

@@ -1,1565 +1,227 @@
-"""
-============================================================================
-AI-GENERATED CODE
-============================================================================
-"""
-"""
-Pocket TTS Web Application
-==========================
-A Gradio-based web interface for the Pocket TTS text-to-speech model.
-This application provides an intuitive interface for generating speech
-from text using either preset voices or voice cloning capabilities.
-Features:
----------
-- Multiple preset voice options
-- Voice cloning from uploaded audio files
-- Configurable generation parameters (temperature, LSD steps, etc.)
-- Real-time character counting and validation
-- Temporary file management with automatic cleanup
-- Thread-safe generation state management
-Usage:
-------
-Run this script directly to launch the web application:
-    $ python app.py
-The application will be available at http://localhost:7860
-"""
-import os
-import time
-import torch
-import tempfile
-import threading
-import scipy.io.wavfile
-import gradio as gr
-from pocket_tts import TTSModel
-# =============================================================================
-# =============================================================================
 #
 # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
 # SPDX-License-Identifier: Apache-2.0
 #
-from huggingface_hub import login
-HF_TOKEN = os.getenv("HF_TOKEN", None)
-if HF_TOKEN:
-    try:
-        login(token=HF_TOKEN, add_to_git_credential=False)
-        print("Authenticated with Hugging Face")
-    except Exception as auth_error:
-        print(f"Hugging Face authentication failed: {auth_error}")
-        print("Voice cloning may not be available")
-else:
-    print("Missing Hugging Face authentication required for the license agreement")
-# =============================================================================
-# =============================================================================
-# =============================================================================
-# ENVIRONMENT CONFIGURATION
-# =============================================================================
-# Configure PyTorch threading behavior
-torch.set_num_threads(1)           # Intra-op parallelism threads
-torch.set_num_interop_threads(1)   # Inter-op parallelism threads
-# =============================================================================
-# APPLICATION CONSTANTS
-# =============================================================================
-# Define all configurable constants and default values used throughout
-# the application. These values control model behavior, UI constraints,
-# and resource management policies.
-# Available preset voice options for speech generation
-AVAILABLE_VOICES = [
-    "alba",
-    "marius",
-    "javert",
-    "jean",
-    "fantine",
-    "cosette",
-    "eponine",
-    "azelma"
-]
-# Default configuration values
-DEFAULT_VOICE = "alba"                        # Default preset voice selection
-DEFAULT_MODEL_VARIANT = "b6369a24"            # Model variant identifier
-DEFAULT_TEMPERATURE = 0.7                     # Generation temperature
-DEFAULT_LSD_DECODE_STEPS = 1                  # Latent space decode steps
-DEFAULT_EOS_THRESHOLD = -4.0                  # End-of-sequence detection threshold
-DEFAULT_NOISE_CLAMP = 0.0                     # Noise clamping value (0 = disabled)
-DEFAULT_FRAMES_AFTER_EOS = 10                 # Additional frames after EOS
-# Input constraints and resource management
-MAXIMUM_INPUT_LENGTH = 1000                   # Maximum text input characters
-TEMPORARY_FILE_LIFETIME_SECONDS = 7200        # Temp file retention (2 hours)
-# Voice mode selection options
-VOICE_MODE_PRESET = "Preset Voices"           # Use predefined voice
-VOICE_MODE_CLONE = "Voice Cloning"            # Clone voice from audio
-# Example prompts with associated voice presets for demonstration
-EXAMPLE_PROMPTS_WITH_VOICES = [
-    {
-        "text": "The quick brown fox jumps over the lazy dog near the riverbank.",
-        "voice": "alba"
-    },
-    {
-        "text": "Welcome to the future of text to speech technology powered by artificial intelligence.",
-        "voice": "marius"
-    },
-    {
-        "text": "Technology continues to push the boundaries of what we thought was possible.",
-        "voice": "javert"
-    },
-    {
-        "text": "The weather today is absolutely beautiful and perfect for a relaxing walk outside.",
-        "voice": "fantine"
-    },
-    {
-        "text": "Science and innovation are transforming how we interact with the world around us.",
-        "voice": "jean"
-    }
-]
-# =============================================================================
-# THREAD SYNCHRONIZATION
-# =============================================================================
-# Global state management for thread-safe generation operations.
-# These locks and flags prevent concurrent generation requests and
-# enable graceful cancellation of ongoing operations.
-generation_state_lock = threading.Lock()   # Lock for generation state access
-is_currently_generating = False            # Flag indicating active generation
-stop_generation_requested = False          # Flag for stop request signaling
-# Temporary file registry for cleanup management
-temporary_files_registry = {}              # Maps file paths to creation timestamps
-temporary_files_lock = threading.Lock()    # Lock for registry access
-# =============================================================================
-# =============================================================================
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import gc
-import atexit
-BACKGROUND_CLEANUP_INTERVAL = 300
-VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
-VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
-MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
-MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
-MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
-MEMORY_CHECK_INTERVAL = 30
-MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
-background_cleanup_thread = None
-background_cleanup_stop_event = threading.Event()
-background_cleanup_trigger_event = threading.Event()
-memory_enforcement_lock = threading.Lock()
-text_to_speech_manager = None
-def get_current_memory_usage():
-    try:
-        with open('/proc/self/status', 'r') as status_file:
-            for line in status_file:
-                if line.startswith('VmRSS:'):
-                    memory_value_kb = int(line.split()[1])
-                    return memory_value_kb * 1024
-    except Exception:
-        pass
-    try:
-        with open('/proc/self/statm', 'r') as statm_file:
-            statm_values = statm_file.read().split()
-            resident_pages = int(statm_values[1])
-            page_size = os.sysconf('SC_PAGE_SIZE')
-            return resident_pages * page_size
-    except Exception:
-        pass
-    try:
-        import resource
-        memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
-        import platform
-        if platform.system() == "Darwin":
-            return memory_usage_kilobytes
-        else:
-            return memory_usage_kilobytes * 1024
-    except Exception:
-        pass
-    return 0
-def check_if_generation_is_currently_active():
-    with generation_state_lock:
-        return is_currently_generating
-def is_memory_usage_within_limit():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage < MAXIMUM_MEMORY_USAGE
-def is_memory_usage_approaching_limit():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage >= MEMORY_WARNING_THRESHOLD
-def is_memory_usage_critical():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
-def is_memory_above_idle_target():
-    current_memory_usage = get_current_memory_usage()
-    return current_memory_usage > MEMORY_IDLE_TARGET
-def force_garbage_collection():
-    gc.collect(0)
-    gc.collect(1)
-    gc.collect(2)
-    if torch.cuda.is_available():
-        torch.cuda.empty_cache()
-        torch.cuda.synchronize()
-def memory_cleanup():
-    force_garbage_collection()
-    try:
-        import ctypes
-        libc = ctypes.CDLL("libc.so.6")
-        libc.malloc_trim(0)
-    except Exception:
-        pass
-    force_garbage_collection()
-def perform_memory_cleanup():
-    global text_to_speech_manager
-    force_garbage_collection()
-    if text_to_speech_manager is not None:
-        text_to_speech_manager.evict_least_recently_used_voice_states()
-    memory_cleanup()
-def enforce_memory_limit_if_exceeded():
-    global text_to_speech_manager
-    with memory_enforcement_lock:
-        generation_is_active = check_if_generation_is_currently_active()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
-            return True
-        force_garbage_collection()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
-            return True
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.evict_least_recently_used_voice_states()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
-            return True
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.clear_voice_state_cache_completely()
-        cleanup_all_temporary_files_immediately()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage < MAXIMUM_MEMORY_USAGE:
-            return True
-        if generation_is_active:
-            return current_memory_usage < MAXIMUM_MEMORY_USAGE
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.unload_model_completely()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        return current_memory_usage < MAXIMUM_MEMORY_USAGE
-def perform_idle_memory_reduction():
-    global text_to_speech_manager
-    if check_if_generation_is_currently_active():
-        return
-    with memory_enforcement_lock:
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        force_garbage_collection()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        if check_if_generation_is_currently_active():
-            return
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.evict_least_recently_used_voice_states()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        if check_if_generation_is_currently_active():
-            return
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.clear_voice_state_cache_completely()
-        memory_cleanup()
-        current_memory_usage = get_current_memory_usage()
-        if current_memory_usage <= MEMORY_IDLE_TARGET:
-            return
-        if check_if_generation_is_currently_active():
-            return
-        if text_to_speech_manager is not None:
-            text_to_speech_manager.unload_model_completely()
-        memory_cleanup()
-def cleanup_all_temporary_files_immediately():
-    with temporary_files_lock:
-        for file_path in list(temporary_files_registry.keys()):
-            try:
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                del temporary_files_registry[file_path]
-            except Exception:
-                pass
-def has_temporary_files_pending_cleanup():
-    with temporary_files_lock:
-        if len(temporary_files_registry) == 0:
-            return False
-        current_timestamp = time.time()
-        for file_path, creation_timestamp in temporary_files_registry.items():
-            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
-                return True
-        return False
-def has_any_temporary_files_registered():
-    with temporary_files_lock:
-        return len(temporary_files_registry) > 0
-def calculate_time_until_next_file_expiration():
-    with temporary_files_lock:
-        if len(temporary_files_registry) == 0:
-            return None
-        current_timestamp = time.time()
-        minimum_time_until_expiration = None
-        for file_path, creation_timestamp in temporary_files_registry.items():
-            time_since_creation = current_timestamp - creation_timestamp
-            time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
-            if time_until_expiration <= 0:
-                return 0
-            if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
-                minimum_time_until_expiration = time_until_expiration
-        return minimum_time_until_expiration
-def perform_background_cleanup_cycle():
-    last_memory_check_timestamp = 0
-    while not background_cleanup_stop_event.is_set():
-        time_until_next_expiration = calculate_time_until_next_file_expiration()
-        current_timestamp = time.time()
-        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
-        if time_until_next_expiration is not None:
-            if time_until_next_expiration <= 0:
-                wait_duration = 1
-            else:
-                wait_duration = min(
-                    time_until_next_expiration + 1,
-                    MEMORY_CHECK_INTERVAL,
-                    BACKGROUND_CLEANUP_INTERVAL
                 )
-        else:
-            if is_memory_above_idle_target() and not check_if_generation_is_currently_active():
-                wait_duration = MEMORY_CHECK_INTERVAL
-            else:
-                background_cleanup_trigger_event.clear()
-                triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
-                if background_cleanup_stop_event.is_set():
-                    break
-                if triggered:
-                    continue
-                else:
-                    if not check_if_generation_is_currently_active():
-                        perform_idle_memory_reduction()
-                    continue
-        background_cleanup_stop_event.wait(timeout=wait_duration)
-        if background_cleanup_stop_event.is_set():
-            break
-        if has_temporary_files_pending_cleanup():
-            cleanup_expired_temporary_files()
-        current_timestamp = time.time()
-        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
-        if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
-            if not check_if_generation_is_currently_active():
-                if is_memory_usage_critical():
-                    enforce_memory_limit_if_exceeded()
-                elif is_memory_above_idle_target():
-                    perform_idle_memory_reduction()
-            last_memory_check_timestamp = current_timestamp
-def trigger_background_cleanup_check():
-    background_cleanup_trigger_event.set()
-def start_background_cleanup_thread():
-    global background_cleanup_thread
-    if background_cleanup_thread is None or not background_cleanup_thread.is_alive():
-        background_cleanup_stop_event.clear()
-        background_cleanup_trigger_event.clear()
-        background_cleanup_thread = threading.Thread(
-            target=perform_background_cleanup_cycle,
-            daemon=True,
-            name="BackgroundCleanupThread"
-        )
-        background_cleanup_thread.start()
-def stop_background_cleanup_thread():
-    background_cleanup_stop_event.set()
-    background_cleanup_trigger_event.set()
-    if background_cleanup_thread is not None and background_cleanup_thread.is_alive():
-        background_cleanup_thread.join(timeout=5)
-atexit.register(stop_background_cleanup_thread)
-# =============================================================================
-# =============================================================================
-# =============================================================================
-# =============================================================================
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-import numpy as np
-def convert_audio_to_pcm_wav(input_path):
-    try:
-        sample_rate, audio_data = scipy.io.wavfile.read(input_path)
-        if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
-            audio_data = np.clip(audio_data, -1.0, 1.0)
-            audio_data = (audio_data * 32767).astype(np.int16)
-        elif audio_data.dtype == np.int32:
-            audio_data = (audio_data >> 16).astype(np.int16)
-        elif audio_data.dtype == np.uint8:
-            audio_data = ((audio_data.astype(np.int16) - 128) * 256).astype(np.int16)
-        elif audio_data.dtype != np.int16:
-            audio_data = audio_data.astype(np.int16)
-        output_file = tempfile.NamedTemporaryFile(suffix="_converted.wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, sample_rate, audio_data)
-        with temporary_files_lock:
-            temporary_files_registry[output_file.name] = time.time()
-        trigger_background_cleanup_check()
-        return output_file.name
-    except Exception as conversion_error:
-        print(f"Warning: {conversion_error}")
-        return input_path
-# =============================================================================
-# =============================================================================
-# =============================================================================
-# TEXT-TO-SPEECH MANAGER CLASS
-# =============================================================================
-class TextToSpeechManager:
-    """
-    Manages TTS model lifecycle and speech generation operations.
-    This class handles model loading, configuration caching, voice state
-    management, and audio generation. It implements lazy loading and
-    caching strategies to optimize performance and memory usage.
-    Attributes:
-        loaded_model: Currently loaded TTS model instance
-        current_configuration: Dict of current model configuration
-        voice_state_cache: Cache of computed voice states for preset voices
-    Example:
-        >>> manager = TextToSpeechManager()
-        >>> manager.load_or_get_model("b6369a24", 0.7, 1, None, -4.0)
-        >>> voice_state = manager.get_voice_state_for_preset("alba")
-        >>> audio = manager.generate_audio("Hello world", voice_state, 10, False)
-    """
-    def __init__(self):
-        """Initialize the TTS manager with empty state."""
-        self.loaded_model = None
-        self.current_configuration = {}
-        self.voice_state_cache = {}
-        self.voice_state_cache_access_timestamps = {}
-        self.voice_state_cache_lock = threading.Lock()
-        self.model_lock = threading.Lock()
-    def is_model_loaded(self):
-        with self.model_lock:
-            return self.loaded_model is not None
-    def unload_model_completely(self):
-        with self.model_lock:
-            self.clear_voice_state_cache_completely()
-            if self.loaded_model is not None:
-                del self.loaded_model
-                self.loaded_model = None
-            self.current_configuration = {}
-        memory_cleanup()
-    def load_or_get_model(
-        self,
-        model_variant,
-        temperature,
-        lsd_decode_steps,
-        noise_clamp,
-        eos_threshold
-    ):
-        """
-        Load a TTS model or return cached instance if configuration matches.
-        This method implements lazy loading with configuration-based caching.
-        If the requested configuration differs from the currently loaded model,
-        a new model instance is created and the voice state cache is cleared.
-        Args:
-            model_variant: Model variant identifier string
-            temperature: Generation temperature (float, 0.1-2.0)
-            lsd_decode_steps: Number of LSD decode steps (int, 1-20)
-            noise_clamp: Maximum noise value or None to disable
-            eos_threshold: End-of-sequence detection threshold (float)
-        Returns:
-            TTSModel: Loaded and configured TTS model instance
-        """
-        perform_memory_cleanup()
-        # Process and validate input parameters with defaults
-        processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
-        processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
-        processed_lsd_steps = int(lsd_decode_steps) if lsd_decode_steps is not None else DEFAULT_LSD_DECODE_STEPS
-        processed_noise_clamp = float(noise_clamp) if noise_clamp and float(noise_clamp) > 0 else None
-        processed_eos_threshold = float(eos_threshold) if eos_threshold is not None else DEFAULT_EOS_THRESHOLD
-        # Build configuration dictionary for comparison
-        requested_configuration = {
-            "variant": processed_variant,
-            "temp": processed_temperature,
-            "lsd_decode_steps": processed_lsd_steps,
-            "noise_clamp": processed_noise_clamp,
-            "eos_threshold": processed_eos_threshold
-        }
-        with self.model_lock:
-            # Load new model if configuration changed or no model loaded
-            if self.loaded_model is None or self.current_configuration != requested_configuration:
-                if self.loaded_model is not None:
-                    self.clear_voice_state_cache_completely()
-                    del self.loaded_model
-                    self.loaded_model = None
-                    memory_cleanup()
-                self.loaded_model = TTSModel.load_model(**requested_configuration)
-                self.current_configuration = requested_configuration
-                self.voice_state_cache = {}  # Clear cache on model change
-            return self.loaded_model
-    def clear_voice_state_cache_completely(self):
-        with self.voice_state_cache_lock:
-            for voice_name in list(self.voice_state_cache.keys()):
-                voice_state_tensor = self.voice_state_cache.pop(voice_name, None)
-                if voice_state_tensor is not None:
-                    del voice_state_tensor
-            self.voice_state_cache.clear()
-            self.voice_state_cache_access_timestamps.clear()
-        force_garbage_collection()
-    def evict_least_recently_used_voice_states(self):
-        with self.voice_state_cache_lock:
-            if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
-                if len(self.voice_state_cache) > 0:
-                    sorted_voice_names_by_access_time = sorted(
-                        self.voice_state_cache_access_timestamps.keys(),
-                        key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
                     )
-                    number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
-                    for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
-                        voice_name_to_remove = sorted_voice_names_by_access_time[index]
-                        voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
-                        self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
-                        if voice_state_tensor is not None:
-                            del voice_state_tensor
-                force_garbage_collection()
-                return
-            sorted_voice_names_by_access_time = sorted(
-                self.voice_state_cache_access_timestamps.keys(),
-                key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
             )
-            number_of_entries_to_remove = len(self.voice_state_cache) - VOICE_STATE_CACHE_CLEANUP_THRESHOLD
-            for index in range(number_of_entries_to_remove):
-                voice_name_to_remove = sorted_voice_names_by_access_time[index]
-                voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
-                self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
-                if voice_state_tensor is not None:
-                    del voice_state_tensor
-        force_garbage_collection()
-    def get_voice_state_for_preset(self, voice_name):
-        """
-        Get or compute voice state for a preset voice.
-        Voice states are cached to avoid redundant computation for
-        frequently used preset voices.
-        Args:
-            voice_name: Name of the preset voice (must be in AVAILABLE_VOICES)
-        Returns:
-            Voice state tensor for the specified preset voice
-        """
-        # Validate voice name and fall back to default if invalid
-        validated_voice = voice_name if voice_name in AVAILABLE_VOICES else DEFAULT_VOICE
-        with self.voice_state_cache_lock:
-            if validated_voice in self.voice_state_cache:
-                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
-                return self.voice_state_cache[validated_voice]
-        if is_memory_usage_approaching_limit():
-            self.evict_least_recently_used_voice_states()
-        if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
-            self.evict_least_recently_used_voice_states()
-        with self.model_lock:
-            if self.loaded_model is None:
-                raise RuntimeError("TTS model is not loaded. Please try again.")
-        # Compute and cache voice state if not already cached
-        if validated_voice not in self.voice_state_cache:
-            computed_voice_state = self.loaded_model.get_state_for_audio_prompt(
-                audio_conditioning=validated_voice,
-                truncate=False
             )
-            with self.voice_state_cache_lock:
-                self.voice_state_cache[validated_voice] = computed_voice_state
-                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
-        return self.voice_state_cache[validated_voice]
-    def get_voice_state_for_clone(self, audio_file_path):
-        """
-        Compute voice state from an uploaded audio file for voice cloning.
-        Unlike preset voices, cloned voice states are not cached as they
-        are typically unique per request. The audio file is first converted
-        to PCM WAV format to ensure compatibility with the model.
-        Args:
-            audio_file_path: Path to the uploaded audio file
-        Returns:
-            Voice state tensor extracted from the audio file
-        """
-        with self.model_lock:
-            if self.loaded_model is None:
-                raise RuntimeError("TTS model is not loaded. Please try again.")
-        converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
-        return self.loaded_model.get_state_for_audio_prompt(
-            audio_conditioning=converted_audio_path,
-            truncate=False
-        )
-    def generate_audio(self, text_content, voice_state, frames_after_eos, enable_custom_frames):
-        """
-        Generate speech audio from text using the specified voice state.
-        Args:
-            text_content: Text string to convert to speech
-            voice_state: Pre-computed voice state tensor
-            frames_after_eos: Number of frames to generate after EOS
-            enable_custom_frames: Whether to use custom frame count
-        Returns:
-            torch.Tensor: Generated audio waveform
-        """
-        with self.model_lock:
-            if self.loaded_model is None:
-                raise RuntimeError("TTS model is not loaded. Please try again.")
-            # Apply custom frames setting if enabled
-            processed_frames = int(frames_after_eos) if enable_custom_frames else None
-            generated_audio = self.loaded_model.generate_audio(
-                model_state=voice_state,
-                text_to_generate=text_content,
-                frames_after_eos=processed_frames,
-                copy_state=True
             )
-        force_garbage_collection()
-        return generated_audio
-    def save_audio_to_file(self, audio_tensor):
-        """
-        Save generated audio tensor to a temporary WAV file.
-        The file is registered for automatic cleanup after the configured
-        lifetime expires.
-        Args:
-            audio_tensor: PyTorch tensor containing audio waveform
-        Returns:
-            str: Path to the saved temporary WAV file
-        """
-        with self.model_lock:
-            if self.loaded_model is None:
-                raise RuntimeError("TTS model is not loaded. Cannot save audio.")
-            audio_sample_rate = self.loaded_model.sample_rate
-        # Convert tensor to numpy array for scipy
-        audio_numpy_data = audio_tensor.numpy()
-        # Create temporary file and write audio data
-        output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
-        scipy.io.wavfile.write(output_file.name, audio_sample_rate, audio_numpy_data)
-        # Register file for cleanup tracking
-        with temporary_files_lock:
-            temporary_files_registry[output_file.name] = time.time()
-        trigger_background_cleanup_check()
-        return output_file.name
-# Create global TTS manager instance
-text_to_speech_manager = TextToSpeechManager()
-# =============================================================================
-# UTILITY FUNCTIONS
-# =============================================================================
-def cleanup_expired_temporary_files():
-    """
-    Remove temporary files that have exceeded their lifetime.
-    This function is called periodically to prevent disk space exhaustion
-    from accumulated temporary audio files. Files older than
-    TEMPORARY_FILE_LIFETIME_SECONDS are removed from disk and registry.
-    """
-    current_timestamp = time.time()
-    expired_files = []
-    with temporary_files_lock:
-        # Identify expired files
-        for file_path, creation_timestamp in list(temporary_files_registry.items()):
-            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
-                expired_files.append(file_path)
-        # Remove expired files from disk and registry
-        for file_path in expired_files:
-            try:
-                if os.path.exists(file_path):
-                    os.remove(file_path)
-                del temporary_files_registry[file_path]
-            except Exception:
-                pass  # Silently ignore deletion errors
-def validate_text_input(text_content):
-    """
-    Validate and clean text input for speech generation.
-    Args:
-        text_content: Raw text input from user
-    Returns:
-        tuple: (is_valid: bool, result: str)
-            - If valid: (True, cleaned_text)
-            - If invalid: (False, error_message or empty string)
-    """
-    # Check for None or non-string input
-    if not text_content or not isinstance(text_content, str):
-        return False, ""
-    # Clean whitespace
-    cleaned_text = text_content.strip()
-    # Check for empty content
-    if not cleaned_text:
-        return False, ""
-    # Check length constraint
-    if len(cleaned_text) > MAXIMUM_INPUT_LENGTH:
-        return False, f"Input exceeds maximum length of {MAXIMUM_INPUT_LENGTH} characters."
-    return True, cleaned_text
-# =============================================================================
-# =============================================================================
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-def check_if_generating():
-    with generation_state_lock:
-        return is_currently_generating
-# =============================================================================
-# =============================================================================
-def request_generation_stop():
-    """
-    Signal a request to stop the current generation.
-    Returns:
-        gr.update: Update to disable the stop button
-    """
-    global stop_generation_requested
-    with generation_state_lock:
-        stop_generation_requested = True
-    return gr.update(interactive=False)
-# =============================================================================
-# SPEECH GENERATION FUNCTION
-# =============================================================================
-def perform_speech_generation(
-    text_input,
-    voice_mode_selection,
-    voice_preset_selection,
-    voice_clone_audio_file,
-    model_variant,
-    lsd_decode_steps,
-    temperature,
-    noise_clamp,
-    eos_threshold,
-    frames_after_eos,
-    enable_custom_frames
-):
-    """
-    Perform the complete speech generation workflow.
-    This function orchestrates the entire generation process including:
-    validation, model loading, voice state preparation, audio generation,
-    and file saving. It handles thread safety and stop requests.
-    Args:
-        text_input: Text to convert to speech
-        voice_mode_selection: "Preset Voices" or "Voice Cloning"
-        voice_preset_selection: Selected preset voice name
-        voice_clone_audio_file: Path to uploaded audio for cloning
-        model_variant: Model variant identifier
-        lsd_decode_steps: Number of LSD decode steps
-        temperature: Generation temperature
-        noise_clamp: Noise clamping value
-        eos_threshold: End-of-sequence threshold
-        frames_after_eos: Frames to generate after EOS
-        enable_custom_frames: Whether to use custom frame count
-    Returns:
-        str or None: Path to generated audio file, or None if stopped
-    Raises:
-        gr.Error: On validation failure or generation error
-    """
-    global is_currently_generating, stop_generation_requested
-    # Run cleanup before starting new generation
-    if has_temporary_files_pending_cleanup():
-        cleanup_expired_temporary_files()
-    perform_memory_cleanup()
-    # Validate text input
-    is_valid, validation_result = validate_text_input(text_input)
-    if not is_valid:
-        if validation_result:
-            raise gr.Error(validation_result)
-        raise gr.Error("Please enter valid text to generate speech.")
-    # Validate voice cloning audio if in clone mode
-    if voice_mode_selection == VOICE_MODE_CLONE:
-        if not voice_clone_audio_file:
-            raise gr.Error("Please upload an audio file for voice cloning.")
-        if not HF_TOKEN:
-            raise gr.Error("Voice cloning is not configured properly at the moment. Please try again later.")
-    # Acquire generation lock
-    with generation_state_lock:
-        if is_currently_generating:
-            raise gr.Error("A generation is already in progress. Please wait.")
-        is_currently_generating = True
-        stop_generation_requested = False
-    generated_audio_tensor = None
-    cloned_voice_state_tensor = None
-    try:
-        # Load or retrieve cached model
-        text_to_speech_manager.load_or_get_model(
-            model_variant,
-            temperature,
-            lsd_decode_steps,
-            noise_clamp,
-            eos_threshold
-        )
-        # Check for stop request after model loading
-        with generation_state_lock:
-            if stop_generation_requested:
-                return None
-        # Prepare voice state based on mode
-        if voice_mode_selection == VOICE_MODE_CLONE:
-            cloned_voice_state_tensor = text_to_speech_manager.get_voice_state_for_clone(voice_clone_audio_file)
-            voice_state = cloned_voice_state_tensor
-        else:
-            voice_state = text_to_speech_manager.get_voice_state_for_preset(voice_preset_selection)
-        # Check for stop request after voice state preparation
-        with generation_state_lock:
-            if stop_generation_requested:
-                return None
-        # Generate audio from text
-        generated_audio_tensor = text_to_speech_manager.generate_audio(
-            validation_result,
-            voice_state,
-            frames_after_eos,
-            enable_custom_frames
-        )
-        # Check for stop request after generation
-        with generation_state_lock:
-            if stop_generation_requested:
-                return None
-        # Save audio to temporary file
-        output_file_path = text_to_speech_manager.save_audio_to_file(generated_audio_tensor)
-        return output_file_path
-    except gr.Error:
-        raise
-    except RuntimeError as runtime_error:
-        raise gr.Error(str(runtime_error))
-    except Exception as generation_error:
-        raise gr.Error(f"Speech generation failed: {str(generation_error)}")
-    finally:
-        # Always release generation lock
-        with generation_state_lock:
-            is_currently_generating = False
-            stop_generation_requested = False
-        if generated_audio_tensor is not None:
-            del generated_audio_tensor
-            generated_audio_tensor = None
-        if cloned_voice_state_tensor is not None:
-            del cloned_voice_state_tensor
-            cloned_voice_state_tensor = None
-        memory_cleanup()
-        trigger_background_cleanup_check()
-# =============================================================================
-# UI STATE MANAGEMENT FUNCTIONS
-# =============================================================================
-def check_generate_button_state(text_content, ui_state):
-    """
-    Update generate button interactivity based on text validity and UI state.
-    Args:
-        text_content: Current text input content
-        ui_state: Current UI state dictionary
-    Returns:
-        gr.update: Update with interactive state
-    """
-    if ui_state.get("generating", False):
-        return gr.update(interactive=False)
-    is_valid, _ = validate_text_input(text_content)
-    return gr.update(interactive=is_valid)
-def calculate_character_count_display(text_content):
-    """
-    Generate HTML for character count display with color coding.
-    Args:
-        text_content: Current text input content
-    Returns:
-        str: HTML string for character count display
-    """
-    character_count = len(text_content) if text_content else 0
-    # Use error color if over limit
-    display_color = (
-        "var(--error-text-color)"
-        if character_count > MAXIMUM_INPUT_LENGTH
-        else "var(--body-text-color-subdued)"
-    )
-    return f"<div style='text-align: right; padding: 4px 0;'><span style='color: {display_color}; font-size: 0.85em;'>{character_count} / {MAXIMUM_INPUT_LENGTH}</span></div>"
-def determine_clear_button_visibility(text_content, audio_output, ui_state):
-    """
-    Determine clear button visibility based on content state and UI state.
-    Clear button is ALWAYS hidden during generation to prevent race conditions.
-    Args:
-        text_content: Current text input content
-        audio_output: Current audio output value
-        ui_state: Current UI state dictionary
-    Returns:
-        gr.update: Update with visibility state
-    """
-    if ui_state.get("generating", False):
-        return gr.update(visible=False)
-    has_text_content = bool(text_content and text_content.strip())
-    has_audio_output = audio_output is not None
-    should_show_clear = has_text_content or has_audio_output
-    return gr.update(visible=should_show_clear)
-def update_voice_mode_visibility(voice_mode_value):
-    """
-    Update visibility of voice selection containers based on mode.
-    Args:
-        voice_mode_value: Selected voice mode
-    Returns:
-        tuple: (preset_container_update, clone_container_update)
-    """
-    if voice_mode_value == VOICE_MODE_CLONE:
-        return gr.update(visible=False), gr.update(visible=True)
-    else:
-        return gr.update(visible=True), gr.update(visible=False)
-# =============================================================================
-# =============================================================================
-#
-# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
-# SPDX-License-Identifier: Apache-2.0
-#
-def switch_to_generating_state(ui_state):
-    new_state = {"generating": True}
-    return (
-        gr.update(visible=False),
-        gr.update(visible=True, interactive=True),
-        gr.update(visible=False),
-        new_state
-    )
-# =============================================================================
-# =============================================================================
-def switch_to_idle_state(text_content, audio_output, ui_state):
-    """
-    Switch UI back to idle state after generation.
-    Args:
-        text_content: Current text input content
-        audio_output: Current audio output value
-        ui_state: Current UI state dictionary (will be updated to idle)
-    Returns:
-        tuple: Updates for (generate_button, stop_button, clear_button, ui_state)
-    """
-    new_state = {"generating": False}
-    has_text_content = bool(text_content and text_content.strip())
-    has_audio_output = audio_output is not None
-    should_show_clear = has_text_content or has_audio_output
-    return (
-        gr.update(visible=True),               # Show generate button
-        gr.update(visible=False),              # Hide stop button
-        gr.update(visible=should_show_clear),  # Show clear if content exists
-        new_state                              # Update state to idle
-    )
-def perform_clear_action():
-    """
-    Clear all input and output fields.
-    Returns:
-        tuple: Reset values for all clearable components
-    """
-    return (
-        "",                        # Clear text input
-        None,                      # Clear audio output
-        gr.update(visible=False),  # Hide clear button
-        VOICE_MODE_PRESET,         # Reset voice mode
-        DEFAULT_VOICE,             # Reset voice preset
-        None                       # Clear clone audio
-    )
-# =============================================================================
-# EXAMPLE HANDLING FUNCTIONS
-# =============================================================================
-def create_example_handler(example_text, example_voice):
-    """
-    Create a handler function for example button clicks.
-    Args:
-        example_text: Example text to set
-        example_voice: Example voice to select
-    Returns:
-        function: Handler that sets example values
-    """
-    def set_example_values():
-        return example_text, VOICE_MODE_PRESET, example_voice
-    return set_example_values
-def format_example_button_label(example_text, example_voice, max_text_length=40):
-    """
-    Format example button label with voice and truncated text.
-    Args:
-        example_text: Full example text
-        example_voice: Voice name
-        max_text_length: Maximum text length before truncation
-    Returns:
-        str: Formatted button label
-    """
-    truncated_text = (
-        example_text[:max_text_length] + "..."
-        if len(example_text) > max_text_length
-        else example_text
-    )
-    return f"[{example_voice}] {truncated_text}"
-start_background_cleanup_thread()
-# =============================================================================
-# GRADIO APPLICATION DEFINITION
-# =============================================================================
-with gr.Blocks() as application:
-    ui_state = gr.State({"generating": False})
-    # -------------------------------------------------------------------------
-    # SIDEBAR SECTION
-    # -------------------------------------------------------------------------
-    # Contains project information, description, and credits
-    with gr.Sidebar():
-        gr.HTML(
-            """
-            <h1>Audio Generation Playground part of the
-            <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank">
-            Demo Playground</a>, and the
-            <a href="https://huggingface.co/umint" target="_blank">
-            UltimaX Intelligence</a> project.</h1><br />
-            This space runs the <b><a href="https://huggingface.co/kyutai/pocket-tts"
-            target="_blank">Pocket TTS</a></b> model from <b>Kyutai</b>.<br /><br />
-            A lightweight text-to-speech (TTS) application designed to run
-            efficiently on CPUs. Forget about the hassle of using GPUs and
-            web APIs serving TTS models.<br /><br />
-            Additionally, this space runs with a custom Docker image to
-            maximize the model's potential and has been optimized for the
-            limited scope of Hugging Face Spaces.<br /><br />
-            ⚠️ This space was created entirely by the
-            <b><a href="https://huggingface.co/hadadrjt/JARVIS" target="_blank">
-            J.A.R.V.I.S.</a></b> model operating in autonomous agent mode.
-            All code was generated by AI without human review.<br /><br />
-            This is an experimental space and is not part of production.
-            There may be minor bugs since the code was generated by AI.
-            However, none have been found so far.<br /><br />
-            If you find a bug, please report it in the community tab.<br /><br />
-            <b>Like this project? You can support me by buying a
-            <a href="https://ko-fi.com/hadad" target="_blank">coffee</a></b>
-            """
-        )
-    # -------------------------------------------------------------------------
-    # AUDIO OUTPUT SECTION
-    # -------------------------------------------------------------------------
-    audio_output_component = gr.Audio(
-        label="Generated Speech Output",
-        type="filepath",
-        interactive=False,
-        show_download_button=True
-    )
-    # -------------------------------------------------------------------------
-    # VOICE SELECTION SECTION
-    # -------------------------------------------------------------------------
-    with gr.Accordion("🎭 Voice Selection", open=True):
-        # Voice mode selector (preset vs cloning)
-        voice_mode_radio = gr.Radio(
-            label="Voice Mode",
-            choices=[VOICE_MODE_PRESET, VOICE_MODE_CLONE],
-            value=VOICE_MODE_PRESET,
-            info="Choose between preset voices or clone a voice from uploaded audio"
-        )
-        # Container for preset voice selection
-        with gr.Column(visible=True) as preset_voice_container:
-            voice_preset_dropdown = gr.Dropdown(
-                label="Select Preset Voice",
-                choices=AVAILABLE_VOICES,
-                value=DEFAULT_VOICE
-            )
-        # Container for voice cloning audio upload
-        with gr.Column(visible=False) as clone_voice_container:
-            voice_clone_audio_input = gr.Audio(
-                label="Upload Audio for Voice Cloning",
-                type="filepath"
-            )
-    # -------------------------------------------------------------------------
-    # GENERATION PARAMETERS SECTION
-    # -------------------------------------------------------------------------
-    with gr.Accordion("⚙️ Generation Parameters", open=False):
-        with gr.Row():
-            temperature_slider = gr.Slider(
-                label="Temperature",
-                minimum=0.1,
-                maximum=2.0,
-                step=0.05,
-                value=DEFAULT_TEMPERATURE,
-                info="Higher values produce more expressive speech"
-            )
-            lsd_decode_steps_slider = gr.Slider(
-                label="LSD Decode Steps",
-                minimum=1,
-                maximum=20,
-                step=1,
-                value=DEFAULT_LSD_DECODE_STEPS,
-                info="More steps may improve quality but slower"
             )
-        with gr.Row():
-            noise_clamp_slider = gr.Slider(
-                label="Noise Clamp",
-                minimum=0.0,
-                maximum=2.0,
-                step=0.05,
-                value=DEFAULT_NOISE_CLAMP,
-                info="Maximum noise sampling value (0 = disabled)"
-            )
-            eos_threshold_slider = gr.Slider(
-                label="End of Sequence Threshold",
-                minimum=-10.0,
-                maximum=0.0,
-                step=0.25,
-                value=DEFAULT_EOS_THRESHOLD,
-                info="Smaller values cause earlier completion"
             )
-    # -------------------------------------------------------------------------
-    # ADVANCED SETTINGS SECTION
-    # -------------------------------------------------------------------------
-    with gr.Accordion("🔧 Advanced Settings", open=False):
-        model_variant_textbox = gr.Textbox(
-            label="Model Variant Identifier",
-            value=DEFAULT_MODEL_VARIANT,
-            info="Model signature for generation"
-        )
-        with gr.Row():
-            enable_custom_frames_checkbox = gr.Checkbox(
-                label="Enable Custom Frames After EOS",
-                value=False,
-                info="Manually control post-EOS frame generation"
-            )
-            frames_after_eos_slider = gr.Slider(
-                label="Frames After EOS",
-                minimum=0,
-                maximum=100,
-                step=1,
-                value=DEFAULT_FRAMES_AFTER_EOS,
-                info="Additional frames after end-of-sequence (80ms per frame)"
             )
-    # -------------------------------------------------------------------------
-    # TEXT INPUT SECTION
-    # -------------------------------------------------------------------------
-    text_input_component = gr.Textbox(
-        label="Prompt",
-        placeholder="Enter the text you want to convert to speech...",
-        lines=3,
-        max_lines=20,
-        max_length=MAXIMUM_INPUT_LENGTH,
-        autoscroll=True
-    )
-    # Character count display
-    character_count_display = gr.HTML(
-        f"<div style='text-align: right; padding: 4px 0;'><span style='color: var(--body-text-color-subdued); font-size: 0.85em;'>0 / {MAXIMUM_INPUT_LENGTH}</span></div>"
-    )
-    # -------------------------------------------------------------------------
-    # ACTION BUTTONS SECTION
-    # -------------------------------------------------------------------------
-    # Primary generate button
-    generate_button = gr.Button(
-        "🎙️ Generate Speech",
-        variant="primary",
-        size="lg",
-        interactive=False
-    )
-    # Stop button (visible during generation)
-    stop_button = gr.Button(
-        "⏹️ Stop Generation",
-        variant="stop",
-        size="lg",
-        visible=False
-    )
-    # Clear button (visible when content exists)
-    clear_button = gr.Button(
-        "🗑️ Clear",
-        variant="secondary",
-        size="lg",
-        visible=False
-    )
-    # -------------------------------------------------------------------------
-    # EXAMPLE PROMPTS SECTION
-    # -------------------------------------------------------------------------
-    gr.HTML("""
-        <div style="padding: 16px 0 8px 0;">
-            <h3 style="margin: 0 0 8px 0; font-size: 1.1em;">💡 Example Prompts</h3>
-            <p style="margin: 0; opacity: 0.7; font-size: 0.9em;">Click any example to generate speech with its assigned voice</p>
-        </div>
-    """)
-    # Create example buttons dynamically
-    example_buttons_list = []
-    with gr.Row():
-        example_button_0 = gr.Button(
-            format_example_button_label(
-                EXAMPLE_PROMPTS_WITH_VOICES[0]["text"],
-                EXAMPLE_PROMPTS_WITH_VOICES[0]["voice"]
-            ),
-            size="sm",
-            variant="secondary"
-        )
-        example_buttons_list.append(example_button_0)
-        example_button_1 = gr.Button(
-            format_example_button_label(
-                EXAMPLE_PROMPTS_WITH_VOICES[1]["text"],
-                EXAMPLE_PROMPTS_WITH_VOICES[1]["voice"]
-            ),
-            size="sm",
-            variant="secondary"
-        )
-        example_buttons_list.append(example_button_1)
-    with gr.Row():
-        example_button_2 = gr.Button(
-            format_example_button_label(
-                EXAMPLE_PROMPTS_WITH_VOICES[2]["text"],
-                EXAMPLE_PROMPTS_WITH_VOICES[2]["voice"]
-            ),
-            size="sm",
-            variant="secondary"
-        )
-        example_buttons_list.append(example_button_2)
-        example_button_3 = gr.Button(
-            format_example_button_label(
-                EXAMPLE_PROMPTS_WITH_VOICES[3]["text"],
-                EXAMPLE_PROMPTS_WITH_VOICES[3]["voice"]
-            ),
-            size="sm",
-            variant="secondary"
-        )
-        example_buttons_list.append(example_button_3)
-    with gr.Row():
-        example_button_4 = gr.Button(
-            format_example_button_label(
-                EXAMPLE_PROMPTS_WITH_VOICES[4]["text"],
-                EXAMPLE_PROMPTS_WITH_VOICES[4]["voice"]
-            ),
-            size="sm",
-            variant="secondary"
-        )
-        example_buttons_list.append(example_button_4)
-    # -------------------------------------------------------------------------
-    # EVENT HANDLERS AND BINDINGS
-    # -------------------------------------------------------------------------
-    # Define input components list for generation function
     generation_inputs = [
         text_input_component,
         voice_mode_radio,
@@ -1574,14 +236,15 @@ with gr.Blocks() as application:
         enable_custom_frames_checkbox
     ]
-    # Voice mode change handler
     voice_mode_radio.change(
         fn=update_voice_mode_visibility,
         inputs=[voice_mode_radio],
-        outputs=[preset_voice_container, clone_voice_container]
     )
-    # Text input change handlers
     text_input_component.change(
         fn=calculate_character_count_display,
         inputs=[text_input_component],
@@ -1590,49 +253,54 @@ with gr.Blocks() as application:
     text_input_component.change(
         fn=check_generate_button_state,
-        inputs=[text_input_component, ui_state],
         outputs=[generate_button]
     )
     text_input_component.change(
         fn=determine_clear_button_visibility,
-        inputs=[text_input_component, audio_output_component, ui_state],
-        outputs=[clear_button]
-    )
-    # Audio output change handler
-    audio_output_component.change(
-        fn=determine_clear_button_visibility,
-        inputs=[text_input_component, audio_output_component, ui_state],
         outputs=[clear_button]
     )
-    # Generate button click handler chain
     generate_button.click(
         fn=switch_to_generating_state,
         inputs=[ui_state],
-        outputs=[generate_button, stop_button, clear_button, ui_state]
     ).then(
         fn=perform_speech_generation,
         inputs=generation_inputs,
         outputs=[audio_output_component]
     ).then(
         fn=switch_to_idle_state,
-        inputs=[text_input_component, audio_output_component, ui_state],
-        outputs=[generate_button, stop_button, clear_button, ui_state]
-    ).then(
-        fn=check_generate_button_state,
-        inputs=[text_input_component, ui_state],
-        outputs=[generate_button]
     )
-    # Stop button handler
     stop_button.click(
         fn=request_generation_stop,
         outputs=[stop_button]
     )
-    # Clear button handler
     clear_button.click(
         fn=perform_clear_action,
         outputs=[
@@ -1645,39 +313,42 @@ with gr.Blocks() as application:
         ]
     )
-    # Example button handlers
     for button_index, example_button in enumerate(example_buttons_list):
-        example_text = EXAMPLE_PROMPTS_WITH_VOICES[button_index]["text"]
-        example_voice = EXAMPLE_PROMPTS_WITH_VOICES[button_index]["voice"]
         example_button.click(
-            fn=create_example_handler(example_text, example_voice),
-            outputs=[text_input_component, voice_mode_radio, voice_preset_dropdown]
-        ).then(
             fn=switch_to_generating_state,
             inputs=[ui_state],
-            outputs=[generate_button, stop_button, clear_button, ui_state]
         ).then(
             fn=perform_speech_generation,
             inputs=generation_inputs,
             outputs=[audio_output_component]
         ).then(
             fn=switch_to_idle_state,
-            inputs=[text_input_component, audio_output_component, ui_state],
-            outputs=[generate_button, stop_button, clear_button, ui_state]
-        ).then(
-            fn=check_generate_button_state,
-            inputs=[text_input_component, ui_state],
-            outputs=[generate_button]
         )
-# =============================================================================
-# APPLICATION ENTRY POINT
-# =============================================================================
-if __name__ == "__main__":
-    application.launch(
-        server_name="0.0.0.0",
-        share=False
-    )

 #
 # SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
 # SPDX-License-Identifier: Apache-2.0
 #
+import math
+import torch
+import gradio as gr
+torch.set_num_threads(1)
+torch.set_num_interop_threads(1)
+from config import (
+    AVAILABLE_VOICES,
+    DEFAULT_VOICE,
+    DEFAULT_MODEL_VARIANT,
+    DEFAULT_TEMPERATURE,
+    DEFAULT_LSD_DECODE_STEPS,
+    DEFAULT_EOS_THRESHOLD,
+    DEFAULT_NOISE_CLAMP,
+    DEFAULT_FRAMES_AFTER_EOS,
+    MAXIMUM_INPUT_LENGTH,
+    VOICE_MODE_PRESET,
+    VOICE_MODE_CLONE,
+    EXAMPLE_PROMPTS
+)
+from src.core.authentication import authenticate_huggingface
+authenticate_huggingface()
+from src.core.memory import start_background_cleanup_thread
+start_background_cleanup_thread()
+from src.generation.handler import (
+    perform_speech_generation,
+    request_generation_stop
+)
+from src.ui.state import (
+    check_generate_button_state,
+    calculate_character_count_display,
+    determine_clear_button_visibility,
+    update_voice_mode_visibility
+)
+from src.ui.handlers import (
+    switch_to_generating_state,
+    switch_to_idle_state,
+    perform_clear_action,
+    create_example_handler,
+    format_example_button_label
+)
+from assets.css.styles import CSS
+from assets.static.title import TITLE
+from assets.static.header import HEADER
+from assets.static.footer import FOOTER
+from assets.static.sidebar import SIDEBAR
+with gr.Blocks(css=CSS, fill_height=False, fill_width=True) as app:
+    ui_state = gr.State({"generating": False})
+    with gr.Sidebar():
+        gr.HTML(SIDEBAR())
+    with gr.Column(elem_classes="header-section"):
+        gr.HTML(TITLE())
+        gr.HTML(HEADER())
+    with gr.Row():
+        with gr.Column():
+            audio_output_component = gr.Audio(
+                label="Generated Speech Output",
+                type="filepath",
+                interactive=False,
+                autoplay=False
+            )
+            with gr.Accordion("Voice Selection", open=True):
+                voice_mode_radio = gr.Radio(
+                    label="Voice Mode",
+                    choices=[
+                        VOICE_MODE_PRESET,
+                        VOICE_MODE_CLONE
+                    ],
+                    value=VOICE_MODE_PRESET,
+                    info="Choose between preset voices or clone a voice from uploaded audio",
+                    elem_id="voice-mode"
                 )
+                with gr.Column(visible=True) as preset_voice_container:
+                    voice_preset_dropdown = gr.Dropdown(
+                        label="Select Preset Voice",
+                        choices=AVAILABLE_VOICES,
+                        value=DEFAULT_VOICE
+                    )
+                with gr.Column(visible=False) as clone_voice_container:
+                    voice_clone_audio_input = gr.Audio(
+                        label="Upload Audio for Voice Cloning",
+                        type="filepath"
+                    )
+            with gr.Accordion("Model Parameters", open=False):
+                with gr.Row():
+                    temperature_slider = gr.Slider(
+                        label="Temperature",
+                        minimum=0.1,
+                        maximum=2.0,
+                        step=0.05,
+                        value=DEFAULT_TEMPERATURE,
+                        info="Higher values produce more expressive speech"
+                    )
+                    lsd_decode_steps_slider = gr.Slider(
+                        label="LSD Decode Steps",
+                        minimum=1,
+                        maximum=20,
+                        step=1,
+                        value=DEFAULT_LSD_DECODE_STEPS,
+                        info="More steps may improve quality but slower"
+                    )
+                with gr.Row():
+                    noise_clamp_slider = gr.Slider(
+                        label="Noise Clamp",
+                        minimum=0.0,
+                        maximum=2.0,
+                        step=0.05,
+                        value=DEFAULT_NOISE_CLAMP,
+                        info="Maximum noise sampling value (0 = disabled)"
+                    )
+                    eos_threshold_slider = gr.Slider(
+                        label="End of Sequence Threshold",
+                        minimum=-10.0,
+                        maximum=0.0,
+                        step=0.25,
+                        value=DEFAULT_EOS_THRESHOLD,
+                        info="Smaller values cause earlier completion"
+                    )
+            with gr.Accordion("Advanced Settings", open=False):
+                model_variant_textbox = gr.Textbox(
+                    label="Model Variant Identifier",
+                    value=DEFAULT_MODEL_VARIANT,
+                    info="Model signature for generation"
+                )
+                with gr.Row():
+                    enable_custom_frames_checkbox = gr.Checkbox(
+                        label="Enable Custom Frames After EOS",
+                        value=False,
+                        info="Manually control post-EOS frame generation"
                     )
+                    frames_after_eos_slider = gr.Slider(
+                        label="Frames After EOS",
+                        minimum=0,
+                        maximum=100,
+                        step=1,
+                        value=DEFAULT_FRAMES_AFTER_EOS,
+                        info="Additional frames after end-of-sequence (80ms per frame)"
+                    )
+        with gr.Column(scale=1):
+            text_input_component = gr.Textbox(
+                label="Prompt",
+                placeholder="Enter the text you want to convert to speech...",
+                lines=2,
+                max_lines=20,
+                max_length=MAXIMUM_INPUT_LENGTH,
+                autoscroll=True
             )
+            character_count_display = gr.HTML(
+                f"<div style='text-align: right; padding: 4px 0;'><span style='color: var(--body-text-color-subdued); font-size: 0.85em;'>0 / {MAXIMUM_INPUT_LENGTH}</span></div>",
+                visible=False
             )
+            generate_button = gr.Button(
+                "Generate",
+                variant="primary",
+                size="lg",
+                interactive=False
             )
+            stop_button = gr.Button(
+                "Stop",
+                variant="stop",
+                size="lg",
+                visible=False
             )
+            clear_button = gr.Button(
+                "Clear",
+                variant="secondary",
+                size="lg",
+                visible=False
             )
+            gr.HTML(
+                """
+                <div style="padding: 16px 0 8px 0;">
+                    <h3 style="margin: 0 0 8px 0; font-size: 1.1em;">Example Prompts</h3>
+                    <p style="margin: 0; opacity: 0.7; font-size: 0.9em;">Click any example to generate speech with its assigned voice</p>
+                </div>
+                """
             )
+            example_buttons_list = []
+            num_examples = len(EXAMPLE_PROMPTS)
+            examples_per_row = 2
+            num_rows = math.ceil(num_examples / examples_per_row)
+            for row_idx in range(num_rows):
+                with gr.Row():
+                    start_idx = row_idx * examples_per_row
+                    end_idx = min(start_idx + examples_per_row, num_examples)
+                    for i in range(start_idx, end_idx):
+                        btn = gr.Button(
+                            format_example_button_label(
+                                EXAMPLE_PROMPTS[i]["text"],
+                                EXAMPLE_PROMPTS[i]["voice"]
+                            ),
+                            size="sm",
+                            variant="secondary"
+                        )
+                        example_buttons_list.append(btn)
+    gr.HTML(FOOTER())
     generation_inputs = [
         text_input_component,
         voice_mode_radio,
         enable_custom_frames_checkbox
     ]
     voice_mode_radio.change(
         fn=update_voice_mode_visibility,
         inputs=[voice_mode_radio],
+        outputs=[
+            preset_voice_container,
+            clone_voice_container
+        ]
     )
     text_input_component.change(
         fn=calculate_character_count_display,
         inputs=[text_input_component],
     text_input_component.change(
         fn=check_generate_button_state,
+        inputs=[
+            text_input_component,
+            ui_state
+        ],
         outputs=[generate_button]
     )
     text_input_component.change(
         fn=determine_clear_button_visibility,
+        inputs=[
+            text_input_component,
+            ui_state
+        ],
         outputs=[clear_button]
     )
     generate_button.click(
         fn=switch_to_generating_state,
         inputs=[ui_state],
+        outputs=[
+            generate_button,
+            stop_button,
+            clear_button,
+            ui_state
+        ]
     ).then(
         fn=perform_speech_generation,
         inputs=generation_inputs,
         outputs=[audio_output_component]
     ).then(
         fn=switch_to_idle_state,
+        inputs=[
+            text_input_component,
+            ui_state
+        ],
+        outputs=[
+            generate_button,
+            stop_button,
+            clear_button,
+            ui_state
+        ]
     )
     stop_button.click(
         fn=request_generation_stop,
         outputs=[stop_button]
     )
     clear_button.click(
         fn=perform_clear_action,
         outputs=[
         ]
     )
     for button_index, example_button in enumerate(example_buttons_list):
+        example_text = EXAMPLE_PROMPTS[button_index]["text"]
+        example_voice = EXAMPLE_PROMPTS[button_index]["voice"]
         example_button.click(
             fn=switch_to_generating_state,
             inputs=[ui_state],
+            outputs=[
+                generate_button,
+                stop_button,
+                clear_button,
+                ui_state
+            ]
+        ).then(
+            fn=create_example_handler(example_text, example_voice),
+            outputs=[
+                text_input_component,
+                voice_mode_radio,
+                voice_preset_dropdown
+            ]
         ).then(
             fn=perform_speech_generation,
             inputs=generation_inputs,
             outputs=[audio_output_component]
         ).then(
             fn=switch_to_idle_state,
+            inputs=[
+                text_input_component,
+                ui_state
+            ],
+            outputs=[
+                generate_button,
+                stop_button,
+                clear_button,
+                ui_state
+            ]
         )
+app.launch(server_name="0.0.0.0")

assets/css/styles.py ADDED Viewed

	@@ -0,0 +1,121 @@

+#
+# https://huggingface.co/spaces/D3vShoaib/pocket-tts
+#
+CSS = """
+footer {
+  visibility: hidden
+}
+.gradio-container {
+    max-width: 100% !important;
+    padding: 0 !important;
+}
+@media (min-width: 768px) {
+    .gradio-container {
+        padding-left: 2% !important;
+        padding-right: 2% !important;
+    }
+}
+.header-section {
+    text-align: left;
+    margin-bottom: 1.5rem;
+}
+.main-title {
+    color: #10b981;
+    font-weight: 800;
+    font-size: 1.8rem;
+    margin: 5px 0;
+}
+@media (min-width: 768px) {
+    .main-title {
+        font-size: 2.2rem;
+    }
+}
+.logo-container {
+    display: flex;
+    justify-content: flex-start;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 0;
+}
+.logo-img {
+    height: 40px;
+    border-radius: 8px;
+}
+@media (min-width: 768px) {
+    .logo-img {
+        height: 50px;
+    }
+    .logo-container {
+        gap: 15px;
+    }
+}
+.links-row {
+    display: flex;
+    flex-wrap: wrap;
+    justify-content: flex-start;
+    gap: 8px;
+    margin: 5px 0 10px 0;
+    font-size: 0.85rem;
+}
+@media (min-width: 768px) {
+    .links-row {
+        gap: 10px;
+        font-size: 0.9rem;
+    }
+}
+.links-row a {
+    color: #10b981;
+    text-decoration: none;
+    padding: 3px 10px;
+    border: 1px solid #10b981;
+    border-radius: 15px;
+    transition: all 0.2s;
+    white-space: nowrap;
+}
+.links-row a:hover {
+    background-color: #10b981;
+    color: white;
+}
+.disclaimer {
+    text-align: center;
+    font-size: 0.8rem;
+    color: #9ca3af;
+    margin-top: 30px;
+    padding: 15px;
+    border-top: 1px solid currentColor;
+}
+@media (min-width: 768px) {
+    .disclaimer {
+        margin-top: 40px;
+        padding: 20px;
+    }
+}
+#voice-mode .wrap {
+    display: flex !important;
+    flex-direction: row !important;
+    width: 100% !important;
+}
+#voice-mode .wrap label {
+    flex: 1 !important;
+    justify-content: center !important;
+    text-align: center !important;
+}
+"""

assets/static/footer.py ADDED Viewed

	@@ -0,0 +1,34 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import (
+    COPYRIGHT_NAME,
+    COPYRIGHT_URL,
+    DESIGN_BY_NAME,
+    DESIGN_BY_URL
+)
+def FOOTER():
+    return f"""
+    <div class="disclaimer" style="font-size: 10px; line-height: 1.4;">
+        <br>
+        <p style="opacity: 0.8;">
+            Copyright © 2026
+            <a href="{COPYRIGHT_URL}" target="_blank"
+               target="_blank" style="color: #10b981; text-decoration: none;">
+                {COPYRIGHT_NAME}
+            </a>,
+            design inspired by
+            <a href="{DESIGN_BY_URL}" target="_blank"
+               target="_blank" style="color: #10b981; text-decoration: none;">
+                {DESIGN_BY_NAME}
+            </a>.
+        </p>
+        <p style="font-size: 8px; opacity: 0.7;">
+            ⚠️ This Space is not affiliated with Kyutai TTS and is provided for demonstration purposes only.
+        </p>
+    </div>
+    """

assets/static/header.py ADDED Viewed

	@@ -0,0 +1,18 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import HEADER_LINKS
+def HEADER():
+    data = ""
+    for link in HEADER_LINKS:
+        data += f'<a href="{link["url"]}" target="_blank">{link["icon"]} {link["text"]}</a>\n'
+    return f"""
+    <div class="links-row">
+        {data}
+    </div>
+    """

assets/static/sidebar.py ADDED Viewed

	@@ -0,0 +1,48 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+def SIDEBAR():
+    return f"""
+    <h1>
+        Audio Generation Playground part of the
+        <a href="https://huggingface.co/spaces/hadadxyz/ai" target="_blank"
+           target="_blank" style="color: #10b981; text-decoration: none;">
+            Demo Playground
+        </a>,
+        and the
+        <a href="https://huggingface.co/umint" target="_blank"
+           target="_blank" style="color: #10b981; text-decoration: none;">
+            UltimaX Intelligence
+        </a>
+        project.
+    </h1><br />
+    <p>
+        This Space runs the
+        <b>
+            <a href="https://huggingface.co/kyutai/pocket-tts"
+               target="_blank" target="_blank" style="color: #10b981; text-decoration: none;">
+                Pocket TTS
+            </a>
+        </b>
+        model from <b>Kyutai</b>.<br /><br />
+        A lightweight text-to-speech (TTS) application designed to run
+        efficiently on CPUs. Forget about the hassle of using GPUs and
+        web APIs serving TTS models.<br /><br />
+        Additionally, this Space uses a custom Docker image to
+        maximize model performance and is optimized for the
+        constraints of Hugging Face Spaces.
+    </p><br />
+    <p>
+        <b>Like this project?</b> You can support me by buying a
+        <a href="https://ko-fi.com/hadad" target="_blank"
+           target="_blank" style="color: #10b981; text-decoration: none;">
+            coffee
+        </a>.
+    </p>
+    """

assets/static/title.py ADDED Viewed

	@@ -0,0 +1,15 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import KYUTAI_LOGO_URL, POCKET_TTS_LOGO_URL, SPACE_INFO
+def TITLE():
+    return f"""
+    <div class="logo-container">
+        <img src="{KYUTAI_LOGO_URL}" class="logo-img" alt="Kyutai Logo">
+        <img src="{POCKET_TTS_LOGO_URL}" class="logo-img" alt="PocketTTS Logo">
+        <h1 class='main-title'>{SPACE_INFO}</h1>
+    </div>
+    """

config.py ADDED Viewed

	@@ -0,0 +1,88 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import os
+HF_TOKEN = os.getenv("HF_TOKEN", None)
+AVAILABLE_VOICES = [
+    "alba",
+    "marius",
+    "javert",
+    "jean",
+    "fantine",
+    "cosette",
+    "eponine",
+    "azelma"
+]
+DEFAULT_VOICE = "alba"
+DEFAULT_MODEL_VARIANT = "b6369a24"
+DEFAULT_TEMPERATURE = 0.7
+DEFAULT_LSD_DECODE_STEPS = 1
+DEFAULT_EOS_THRESHOLD = -4.0
+DEFAULT_NOISE_CLAMP = 0.0
+DEFAULT_FRAMES_AFTER_EOS = 10
+VOICE_MODE_PRESET = "Preset Voices"
+VOICE_MODE_CLONE = "Voice Cloning"
+VOICE_STATE_CACHE_MAXIMUM_SIZE = 8
+VOICE_STATE_CACHE_CLEANUP_THRESHOLD = 4
+BACKGROUND_CLEANUP_INTERVAL = 300
+MAXIMUM_INPUT_LENGTH = 1000
+TEMPORARY_FILE_LIFETIME_SECONDS = 7200
+MAXIMUM_MEMORY_USAGE = 1 * 1024 * 1024 * 1024
+MEMORY_WARNING_THRESHOLD = int(0.7 * MAXIMUM_MEMORY_USAGE)
+MEMORY_CRITICAL_THRESHOLD = int(0.85 * MAXIMUM_MEMORY_USAGE)
+MEMORY_CHECK_INTERVAL = 30
+MEMORY_IDLE_TARGET = int(0.5 * MAXIMUM_MEMORY_USAGE)
+EXAMPLE_PROMPTS = [
+    {
+        "text": "The quick brown fox jumps over the lazy dog near the riverbank.",
+        "voice": "alba"
+    },
+    {
+        "text": "Welcome to the future of text to speech technology powered by artificial intelligence.",
+        "voice": "marius"
+    },
+    {
+        "text": "Technology continues to push the boundaries of what we thought was possible.",
+        "voice": "javert"
+    },
+    {
+        "text": "The weather today is absolutely beautiful and perfect for a relaxing walk outside.",
+        "voice": "fantine"
+    },
+    {
+        "text": "Science and innovation are transforming how we interact with the world around us.",
+        "voice": "jean"
+    }
+]
+KYUTAI_LOGO_URL = "https://cdn-avatars.huggingface.co/v1/production/uploads/6355a3c1805be5a8f30fea49/8xGdIOlfkopZfhbMitw_k.jpeg"
+POCKET_TTS_LOGO_URL = "https://raw.githubusercontent.com/kyutai-labs/pocket-tts/refs/heads/main/docs/logo.png"
+SPACE_INFO = "Pocket TTS"
+HEADER_LINKS = [
+    {"icon": "🔊", "text": "Demo", "url": "https://kyutai.org/tts"},
+    {"icon": "🐱‍💻", "text": "GitHub", "url": "https://github.com/kyutai-labs/pocket-tts"},
+    {"icon": "🤗", "text": "Model Card", "url": "https://huggingface.co/kyutai/pocket-tts"},
+    {"icon": "🤗", "text": "Space", "url": "https://huggingface.co/spaces/hadadxyz/pocket-tts-hf-cpu-optimized"},
+    {"icon": "📄", "text": "Paper", "url": "https://arxiv.org/abs/2509.06926"},
+    {"icon": "📚", "text": "Docs", "url": "https://github.com/kyutai-labs/pocket-tts/tree/main/docs"},
+]
+COPYRIGHT_NAME = "Hadad Darajat"
+COPYRIGHT_URL = "https://www.linkedin.com/in/hadadrjt"
+DESIGN_BY_NAME = "D3vShoaib/pocket-tts"
+DESIGN_BY_URL = f"https://huggingface.co/spaces/{DESIGN_BY_NAME}"

src/audio/converter.py ADDED Viewed

	@@ -0,0 +1,42 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time
+import tempfile
+import numpy as np
+import scipy.io.wavfile
+from ..core.state import temporary_files_registry, temporary_files_lock
+from ..core.memory import trigger_background_cleanup_check
+def convert_audio_to_pcm_wav(input_path):
+    try:
+        sample_rate, audio_data = scipy.io.wavfile.read(input_path)
+        if audio_data.dtype == np.float32 or audio_data.dtype == np.float64:
+            audio_data = np.clip(audio_data, -1.0, 1.0)
+            audio_data = (audio_data * 32767).astype(np.int16)
+        elif audio_data.dtype == np.int32:
+            audio_data = (audio_data >> 16).astype(np.int16)
+        elif audio_data.dtype == np.uint8:
+            audio_data = ((audio_data.astype(np.int16) - 128) * 256).astype(np.int16)
+        elif audio_data.dtype != np.int16:
+            audio_data = audio_data.astype(np.int16)
+        output_file = tempfile.NamedTemporaryFile(suffix="_converted.wav", delete=False)
+        scipy.io.wavfile.write(output_file.name, sample_rate, audio_data)
+        with temporary_files_lock:
+            temporary_files_registry[output_file.name] = time.time()
+        trigger_background_cleanup_check()
+        return output_file.name
+    except Exception as conversion_error:
+        print(f"Warning: {conversion_error}")
+        return input_path

src/core/authentication.py ADDED Viewed

	@@ -0,0 +1,23 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import HF_TOKEN
+from huggingface_hub import login
+def authenticate_huggingface():
+    if HF_TOKEN:
+        try:
+            login(token=HF_TOKEN, add_to_git_credential=False)
+            print("Authenticated with Hugging Face")
+        except Exception as authentication_error:
+            print(f"Hugging Face authentication failed: {authentication_error}")
+            print("Voice cloning may not be available")
+    else:
+        print("Missing Hugging Face authentication required for the license agreement")
+def get_huggingface_token():
+    return HF_TOKEN

src/core/memory.py ADDED Viewed

	@@ -0,0 +1,359 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import os
+import gc
+import time
+import atexit
+import threading
+import torch
+from config import (
+    TEMPORARY_FILE_LIFETIME_SECONDS,
+    BACKGROUND_CLEANUP_INTERVAL,
+    MEMORY_WARNING_THRESHOLD,
+    MEMORY_CRITICAL_THRESHOLD,
+    MEMORY_CHECK_INTERVAL,
+    MEMORY_IDLE_TARGET,
+    MAXIMUM_MEMORY_USAGE
+)
+from ..core.state import (
+    temporary_files_registry,
+    temporary_files_lock,
+    memory_enforcement_lock,
+    background_cleanup_thread,
+    background_cleanup_stop_event,
+    background_cleanup_trigger_event,
+    check_if_generation_is_currently_active,
+    get_text_to_speech_manager
+)
+def get_current_memory_usage():
+    try:
+        with open('/proc/self/status', 'r') as status_file:
+            for line in status_file:
+                if line.startswith('VmRSS:'):
+                    memory_value_kb = int(line.split()[1])
+                    return memory_value_kb * 1024
+    except Exception:
+        pass
+    try:
+        with open('/proc/self/statm', 'r') as statm_file:
+            statm_values = statm_file.read().split()
+            resident_pages = int(statm_values[1])
+            page_size = os.sysconf('SC_PAGE_SIZE')
+            return resident_pages * page_size
+    except Exception:
+        pass
+    try:
+        import resource
+        import platform
+        memory_usage_kilobytes = resource.getrusage(resource.RUSAGE_SELF).ru_maxrss
+        if platform.system() == "Darwin":
+            return memory_usage_kilobytes
+        else:
+            return memory_usage_kilobytes * 1024
+    except Exception:
+        pass
+    return 0
+def is_memory_usage_within_limit():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage < MAXIMUM_MEMORY_USAGE
+def is_memory_usage_approaching_limit():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage >= MEMORY_WARNING_THRESHOLD
+def is_memory_usage_critical():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage >= MEMORY_CRITICAL_THRESHOLD
+def is_memory_above_idle_target():
+    current_memory_usage = get_current_memory_usage()
+    return current_memory_usage > MEMORY_IDLE_TARGET
+def force_garbage_collection():
+    gc.collect(0)
+    gc.collect(1)
+    gc.collect(2)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        torch.cuda.synchronize()
+def memory_cleanup():
+    force_garbage_collection()
+    try:
+        import ctypes
+        libc = ctypes.CDLL("libc.so.6")
+        libc.malloc_trim(0)
+    except Exception:
+        pass
+    force_garbage_collection()
+def perform_memory_cleanup():
+    force_garbage_collection()
+    tts_manager = get_text_to_speech_manager()
+    if tts_manager is not None:
+        tts_manager.evict_least_recently_used_voice_states()
+    memory_cleanup()
+def cleanup_expired_temporary_files():
+    current_timestamp = time.time()
+    expired_files = []
+    with temporary_files_lock:
+        for file_path, creation_timestamp in list(temporary_files_registry.items()):
+            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
+                expired_files.append(file_path)
+        for file_path in expired_files:
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                del temporary_files_registry[file_path]
+            except Exception:
+                pass
+def cleanup_all_temporary_files_immediately():
+    with temporary_files_lock:
+        for file_path in list(temporary_files_registry.keys()):
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                del temporary_files_registry[file_path]
+            except Exception:
+                pass
+def has_temporary_files_pending_cleanup():
+    with temporary_files_lock:
+        if len(temporary_files_registry) == 0:
+            return False
+        current_timestamp = time.time()
+        for file_path, creation_timestamp in temporary_files_registry.items():
+            if current_timestamp - creation_timestamp > TEMPORARY_FILE_LIFETIME_SECONDS:
+                return True
+        return False
+def has_any_temporary_files_registered():
+    with temporary_files_lock:
+        return len(temporary_files_registry) > 0
+def calculate_time_until_next_file_expiration():
+    with temporary_files_lock:
+        if len(temporary_files_registry) == 0:
+            return None
+        current_timestamp = time.time()
+        minimum_time_until_expiration = None
+        for file_path, creation_timestamp in temporary_files_registry.items():
+            time_since_creation = current_timestamp - creation_timestamp
+            time_until_expiration = TEMPORARY_FILE_LIFETIME_SECONDS - time_since_creation
+            if time_until_expiration <= 0:
+                return 0
+            if minimum_time_until_expiration is None or time_until_expiration < minimum_time_until_expiration:
+                minimum_time_until_expiration = time_until_expiration
+        return minimum_time_until_expiration
+def enforce_memory_limit_if_exceeded():
+    with memory_enforcement_lock:
+        generation_is_active = check_if_generation_is_currently_active()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
+            return True
+        force_garbage_collection()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_WARNING_THRESHOLD:
+            return True
+        tts_manager = get_text_to_speech_manager()
+        if tts_manager is not None:
+            tts_manager.evict_least_recently_used_voice_states()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MEMORY_CRITICAL_THRESHOLD:
+            return True
+        if tts_manager is not None:
+            tts_manager.clear_voice_state_cache_completely()
+        cleanup_all_temporary_files_immediately()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage < MAXIMUM_MEMORY_USAGE:
+            return True
+        if generation_is_active:
+            return current_memory_usage < MAXIMUM_MEMORY_USAGE
+        if tts_manager is not None:
+            tts_manager.unload_model_completely()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        return current_memory_usage < MAXIMUM_MEMORY_USAGE
+def perform_idle_memory_reduction():
+    if check_if_generation_is_currently_active():
+        return
+    with memory_enforcement_lock:
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        force_garbage_collection()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        tts_manager = get_text_to_speech_manager()
+        if tts_manager is not None:
+            tts_manager.evict_least_recently_used_voice_states()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        if tts_manager is not None:
+            tts_manager.clear_voice_state_cache_completely()
+        memory_cleanup()
+        current_memory_usage = get_current_memory_usage()
+        if current_memory_usage <= MEMORY_IDLE_TARGET:
+            return
+        if check_if_generation_is_currently_active():
+            return
+        if tts_manager is not None:
+            tts_manager.unload_model_completely()
+        memory_cleanup()
+def perform_background_cleanup_cycle():
+    last_memory_check_timestamp = 0
+    while not background_cleanup_stop_event.is_set():
+        time_until_next_expiration = calculate_time_until_next_file_expiration()
+        current_timestamp = time.time()
+        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
+        if time_until_next_expiration is not None:
+            if time_until_next_expiration <= 0:
+                wait_duration = 1
+            else:
+                wait_duration = min(
+                    time_until_next_expiration + 1,
+                    MEMORY_CHECK_INTERVAL,
+                    BACKGROUND_CLEANUP_INTERVAL
+                )
+        else:
+            if is_memory_above_idle_target() and not check_if_generation_is_currently_active():
+                wait_duration = MEMORY_CHECK_INTERVAL
+            else:
+                background_cleanup_trigger_event.clear()
+                triggered = background_cleanup_trigger_event.wait(timeout=BACKGROUND_CLEANUP_INTERVAL)
+                if background_cleanup_stop_event.is_set():
+                    break
+                if triggered:
+                    continue
+                else:
+                    if not check_if_generation_is_currently_active():
+                        perform_idle_memory_reduction()
+                    continue
+        background_cleanup_stop_event.wait(timeout=wait_duration)
+        if background_cleanup_stop_event.is_set():
+            break
+        if has_temporary_files_pending_cleanup():
+            cleanup_expired_temporary_files()
+        current_timestamp = time.time()
+        time_since_last_memory_check = current_timestamp - last_memory_check_timestamp
+        if time_since_last_memory_check >= MEMORY_CHECK_INTERVAL:
+            if not check_if_generation_is_currently_active():
+                if is_memory_usage_critical():
+                    enforce_memory_limit_if_exceeded()
+                elif is_memory_above_idle_target():
+                    perform_idle_memory_reduction()
+            last_memory_check_timestamp = current_timestamp
+def trigger_background_cleanup_check():
+    background_cleanup_trigger_event.set()
+def start_background_cleanup_thread():
+    global background_cleanup_thread
+    from ..core import state as global_state
+    if global_state.background_cleanup_thread is None or not global_state.background_cleanup_thread.is_alive():
+        background_cleanup_stop_event.clear()
+        background_cleanup_trigger_event.clear()
+        global_state.background_cleanup_thread = threading.Thread(
+            target=perform_background_cleanup_cycle,
+            daemon=True,
+            name="BackgroundCleanupThread"
+        )
+        global_state.background_cleanup_thread.start()
+def stop_background_cleanup_thread():
+    from ..core import state as global_state
+    background_cleanup_stop_event.set()
+    background_cleanup_trigger_event.set()
+    if global_state.background_cleanup_thread is not None and global_state.background_cleanup_thread.is_alive():
+        global_state.background_cleanup_thread.join(timeout=5)
+atexit.register(stop_background_cleanup_thread)

src/core/state.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import threading
+generation_state_lock = threading.Lock()
+is_currently_generating = False
+stop_generation_requested = False
+temporary_files_registry = {}
+temporary_files_lock = threading.Lock()
+memory_enforcement_lock = threading.Lock()
+background_cleanup_thread = None
+background_cleanup_stop_event = threading.Event()
+background_cleanup_trigger_event = threading.Event()
+text_to_speech_manager = None
+def set_text_to_speech_manager(manager_instance):
+    global text_to_speech_manager
+    text_to_speech_manager = manager_instance
+def get_text_to_speech_manager():
+    global text_to_speech_manager
+    return text_to_speech_manager
+def check_if_generation_is_currently_active():
+    with generation_state_lock:
+        return is_currently_generating
+def set_generation_active(is_active):
+    global is_currently_generating
+    with generation_state_lock:
+        is_currently_generating = is_active
+def set_stop_generation_requested(requested):
+    global stop_generation_requested
+    with generation_state_lock:
+        stop_generation_requested = requested
+def get_stop_generation_requested():
+    with generation_state_lock:
+        return stop_generation_requested

src/generation/handler.py ADDED Viewed

	@@ -0,0 +1,135 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import gradio as gr
+from config import VOICE_MODE_CLONE
+from ..core.state import (
+    generation_state_lock,
+    get_stop_generation_requested,
+    set_stop_generation_requested
+)
+from ..core.authentication import get_huggingface_token
+from ..core.memory import (
+    has_temporary_files_pending_cleanup,
+    cleanup_expired_temporary_files,
+    perform_memory_cleanup,
+    memory_cleanup,
+    trigger_background_cleanup_check
+)
+from ..tts.manager import text_to_speech_manager
+from ..validation.text import validate_text_input
+def check_if_generating():
+    from ..core.state import is_currently_generating
+    with generation_state_lock:
+        return is_currently_generating
+def request_generation_stop():
+    set_stop_generation_requested(True)
+    return gr.update(interactive=False)
+def perform_speech_generation(
+    text_input,
+    voice_mode_selection,
+    voice_preset_selection,
+    voice_clone_audio_file,
+    model_variant,
+    lsd_decode_steps,
+    temperature,
+    noise_clamp,
+    eos_threshold,
+    frames_after_eos,
+    enable_custom_frames
+):
+    from ..core import state as global_state
+    if has_temporary_files_pending_cleanup():
+        cleanup_expired_temporary_files()
+    perform_memory_cleanup()
+    is_valid, validation_result = validate_text_input(text_input)
+    if not is_valid:
+        if validation_result:
+            raise gr.Error(validation_result)
+        raise gr.Error("Please enter valid text to generate speech.")
+    if voice_mode_selection == VOICE_MODE_CLONE:
+        if not voice_clone_audio_file:
+            raise gr.Error("Please upload an audio file for voice cloning.")
+        if not get_huggingface_token():
+            raise gr.Error("Voice cloning is not configured properly at the moment. Please try again later.")
+    with generation_state_lock:
+        if global_state.is_currently_generating:
+            raise gr.Error("A generation is already in progress. Please wait.")
+        global_state.is_currently_generating = True
+        global_state.stop_generation_requested = False
+    generated_audio_tensor = None
+    cloned_voice_state_tensor = None
+    try:
+        text_to_speech_manager.load_or_get_model(
+            model_variant,
+            temperature,
+            lsd_decode_steps,
+            noise_clamp,
+            eos_threshold
+        )
+        with generation_state_lock:
+            if global_state.stop_generation_requested:
+                return None
+        if voice_mode_selection == VOICE_MODE_CLONE:
+            cloned_voice_state_tensor = text_to_speech_manager.get_voice_state_for_clone(voice_clone_audio_file)
+            voice_state = cloned_voice_state_tensor
+        else:
+            voice_state = text_to_speech_manager.get_voice_state_for_preset(voice_preset_selection)
+        with generation_state_lock:
+            if global_state.stop_generation_requested:
+                return None
+        generated_audio_tensor = text_to_speech_manager.generate_audio(
+            validation_result,
+            voice_state,
+            frames_after_eos,
+            enable_custom_frames
+        )
+        with generation_state_lock:
+            if global_state.stop_generation_requested:
+                return None
+        output_file_path = text_to_speech_manager.save_audio_to_file(generated_audio_tensor)
+        return output_file_path
+    except gr.Error:
+        raise
+    except RuntimeError as runtime_error:
+        raise gr.Error(str(runtime_error))
+    except Exception as generation_error:
+        raise gr.Error(f"Speech generation failed: {str(generation_error)}")
+    finally:
+        with generation_state_lock:
+            global_state.is_currently_generating = False
+            global_state.stop_generation_requested = False
+        if generated_audio_tensor is not None:
+            del generated_audio_tensor
+            generated_audio_tensor = None
+        if cloned_voice_state_tensor is not None:
+            del cloned_voice_state_tensor
+            cloned_voice_state_tensor = None
+        memory_cleanup()
+        trigger_background_cleanup_check()

src/tts/manager.py ADDED Viewed

	@@ -0,0 +1,231 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import time
+import tempfile
+import threading
+import torch
+import scipy.io.wavfile
+from pocket_tts import TTSModel
+from config import (
+    AVAILABLE_VOICES,
+    DEFAULT_VOICE,
+    DEFAULT_MODEL_VARIANT,
+    DEFAULT_TEMPERATURE,
+    DEFAULT_LSD_DECODE_STEPS,
+    DEFAULT_EOS_THRESHOLD,
+    VOICE_STATE_CACHE_MAXIMUM_SIZE,
+    VOICE_STATE_CACHE_CLEANUP_THRESHOLD
+)
+from ..core.state import (
+    temporary_files_registry,
+    temporary_files_lock,
+    set_text_to_speech_manager
+)
+from ..core.memory import (
+    force_garbage_collection,
+    memory_cleanup,
+    perform_memory_cleanup,
+    trigger_background_cleanup_check,
+    is_memory_usage_approaching_limit
+)
+from ..audio.converter import convert_audio_to_pcm_wav
+class TextToSpeechManager:
+    def __init__(self):
+        self.loaded_model = None
+        self.current_configuration = {}
+        self.voice_state_cache = {}
+        self.voice_state_cache_access_timestamps = {}
+        self.voice_state_cache_lock = threading.Lock()
+        self.model_lock = threading.Lock()
+    def is_model_loaded(self):
+        with self.model_lock:
+            return self.loaded_model is not None
+    def unload_model_completely(self):
+        with self.model_lock:
+            self.clear_voice_state_cache_completely()
+            if self.loaded_model is not None:
+                del self.loaded_model
+                self.loaded_model = None
+            self.current_configuration = {}
+        memory_cleanup()
+    def load_or_get_model(
+        self,
+        model_variant,
+        temperature,
+        lsd_decode_steps,
+        noise_clamp,
+        eos_threshold
+    ):
+        perform_memory_cleanup()
+        processed_variant = str(model_variant or DEFAULT_MODEL_VARIANT).strip()
+        processed_temperature = float(temperature) if temperature is not None else DEFAULT_TEMPERATURE
+        processed_lsd_steps = int(lsd_decode_steps) if lsd_decode_steps is not None else DEFAULT_LSD_DECODE_STEPS
+        processed_noise_clamp = float(noise_clamp) if noise_clamp and float(noise_clamp) > 0 else None
+        processed_eos_threshold = float(eos_threshold) if eos_threshold is not None else DEFAULT_EOS_THRESHOLD
+        requested_configuration = {
+            "variant": processed_variant,
+            "temp": processed_temperature,
+            "lsd_decode_steps": processed_lsd_steps,
+            "noise_clamp": processed_noise_clamp,
+            "eos_threshold": processed_eos_threshold
+        }
+        with self.model_lock:
+            if self.loaded_model is None or self.current_configuration != requested_configuration:
+                if self.loaded_model is not None:
+                    self.clear_voice_state_cache_completely()
+                    del self.loaded_model
+                    self.loaded_model = None
+                    memory_cleanup()
+                self.loaded_model = TTSModel.load_model(**requested_configuration)
+                self.current_configuration = requested_configuration
+                self.voice_state_cache = {}
+            return self.loaded_model
+    def clear_voice_state_cache_completely(self):
+        with self.voice_state_cache_lock:
+            for voice_name in list(self.voice_state_cache.keys()):
+                voice_state_tensor = self.voice_state_cache.pop(voice_name, None)
+                if voice_state_tensor is not None:
+                    del voice_state_tensor
+            self.voice_state_cache.clear()
+            self.voice_state_cache_access_timestamps.clear()
+        force_garbage_collection()
+    def evict_least_recently_used_voice_states(self):
+        with self.voice_state_cache_lock:
+            if len(self.voice_state_cache) <= VOICE_STATE_CACHE_CLEANUP_THRESHOLD:
+                if len(self.voice_state_cache) > 0:
+                    sorted_voice_names_by_access_time = sorted(
+                        self.voice_state_cache_access_timestamps.keys(),
+                        key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
+                    )
+                    number_of_entries_to_remove = max(1, len(self.voice_state_cache) // 2)
+                    for index in range(min(number_of_entries_to_remove, len(sorted_voice_names_by_access_time))):
+                        voice_name_to_remove = sorted_voice_names_by_access_time[index]
+                        voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
+                        self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
+                        if voice_state_tensor is not None:
+                            del voice_state_tensor
+                force_garbage_collection()
+                return
+            sorted_voice_names_by_access_time = sorted(
+                self.voice_state_cache_access_timestamps.keys(),
+                key=lambda voice_name: self.voice_state_cache_access_timestamps[voice_name]
+            )
+            number_of_entries_to_remove = len(self.voice_state_cache) - VOICE_STATE_CACHE_CLEANUP_THRESHOLD
+            for index in range(number_of_entries_to_remove):
+                voice_name_to_remove = sorted_voice_names_by_access_time[index]
+                voice_state_tensor = self.voice_state_cache.pop(voice_name_to_remove, None)
+                self.voice_state_cache_access_timestamps.pop(voice_name_to_remove, None)
+                if voice_state_tensor is not None:
+                    del voice_state_tensor
+        force_garbage_collection()
+    def get_voice_state_for_preset(self, voice_name):
+        validated_voice = voice_name if voice_name in AVAILABLE_VOICES else DEFAULT_VOICE
+        with self.voice_state_cache_lock:
+            if validated_voice in self.voice_state_cache:
+                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
+                return self.voice_state_cache[validated_voice]
+        if is_memory_usage_approaching_limit():
+            self.evict_least_recently_used_voice_states()
+        if len(self.voice_state_cache) >= VOICE_STATE_CACHE_MAXIMUM_SIZE:
+            self.evict_least_recently_used_voice_states()
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
+        if validated_voice not in self.voice_state_cache:
+            computed_voice_state = self.loaded_model.get_state_for_audio_prompt(
+                audio_conditioning=validated_voice,
+                truncate=False
+            )
+            with self.voice_state_cache_lock:
+                self.voice_state_cache[validated_voice] = computed_voice_state
+                self.voice_state_cache_access_timestamps[validated_voice] = time.time()
+        return self.voice_state_cache[validated_voice]
+    def get_voice_state_for_clone(self, audio_file_path):
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
+        converted_audio_path = convert_audio_to_pcm_wav(audio_file_path)
+        return self.loaded_model.get_state_for_audio_prompt(
+            audio_conditioning=converted_audio_path,
+            truncate=False
+        )
+    def generate_audio(self, text_content, voice_state, frames_after_eos, enable_custom_frames):
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Please try again.")
+            processed_frames = int(frames_after_eos) if enable_custom_frames else None
+            generated_audio = self.loaded_model.generate_audio(
+                model_state=voice_state,
+                text_to_generate=text_content,
+                frames_after_eos=processed_frames,
+                copy_state=True
+            )
+        force_garbage_collection()
+        return generated_audio
+    def save_audio_to_file(self, audio_tensor):
+        with self.model_lock:
+            if self.loaded_model is None:
+                raise RuntimeError("TTS model is not loaded. Cannot save audio.")
+            audio_sample_rate = self.loaded_model.sample_rate
+        audio_numpy_data = audio_tensor.numpy()
+        output_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
+        scipy.io.wavfile.write(output_file.name, audio_sample_rate, audio_numpy_data)
+        with temporary_files_lock:
+            temporary_files_registry[output_file.name] = time.time()
+        trigger_background_cleanup_check()
+        return output_file.name
+text_to_speech_manager = TextToSpeechManager()
+set_text_to_speech_manager(text_to_speech_manager)

src/ui/handlers.py ADDED Viewed

	@@ -0,0 +1,58 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import gradio as gr
+from config import VOICE_MODE_PRESET, DEFAULT_VOICE
+from ..validation.text import validate_text_input
+def switch_to_generating_state(ui_state):
+    new_state = {"generating": True}
+    return (
+        gr.update(visible=False),
+        gr.update(visible=True, interactive=True),
+        gr.update(visible=False),
+        new_state
+    )
+def switch_to_idle_state(text_content, ui_state):
+    new_state = {"generating": False}
+    has_text_content = bool(text_content and text_content.strip())
+    should_show_clear = has_text_content
+    is_valid_text, _ = validate_text_input(text_content)
+    return (
+        gr.update(visible=True, interactive=is_valid_text),
+        gr.update(visible=False),
+        gr.update(visible=should_show_clear),
+        new_state
+    )
+def perform_clear_action():
+    return (
+        "",
+        None,
+        gr.update(visible=False),
+        VOICE_MODE_PRESET,
+        DEFAULT_VOICE,
+        None
+    )
+def create_example_handler(example_text, example_voice):
+    def set_example_values():
+        return example_text, VOICE_MODE_PRESET, example_voice
+    return set_example_values
+def format_example_button_label(example_text, example_voice, max_text_length=40):
+    truncated_text = (
+        example_text[:max_text_length] + "..."
+        if len(example_text) > max_text_length
+        else example_text
+    )
+    return f"[{example_voice}] {truncated_text}"

src/ui/state.py ADDED Viewed

	@@ -0,0 +1,43 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+import gradio as gr
+from config import MAXIMUM_INPUT_LENGTH, VOICE_MODE_CLONE
+from ..validation.text import validate_text_input
+def check_generate_button_state(text_content, ui_state):
+    if ui_state.get("generating", False):
+        return gr.update(interactive=False)
+    is_valid, _ = validate_text_input(text_content)
+    return gr.update(interactive=is_valid)
+def calculate_character_count_display(text_content):
+    character_count = len(text_content) if text_content else 0
+    display_color = (
+        "var(--error-text-color)"
+        if character_count > MAXIMUM_INPUT_LENGTH
+        else "var(--body-text-color-subdued)"
+    )
+    return f"<div style='text-align: right; padding: 4px 0;'><span style='color: {display_color}; font-size: 0.85em;'>{character_count} / {MAXIMUM_INPUT_LENGTH}</span></div>"
+def determine_clear_button_visibility(text_content, ui_state):
+    if ui_state.get("generating", False):
+        return gr.update(visible=False)
+    has_text_content = bool(text_content and text_content.strip())
+    should_show_clear = has_text_content
+    return gr.update(visible=should_show_clear)
+def update_voice_mode_visibility(voice_mode_value):
+    if voice_mode_value == VOICE_MODE_CLONE:
+        return gr.update(visible=False), gr.update(visible=True)
+    else:
+        return gr.update(visible=True), gr.update(visible=False)

src/validation/text.py ADDED Viewed

	@@ -0,0 +1,20 @@

+#
+# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
+# SPDX-License-Identifier: Apache-2.0
+#
+from config import MAXIMUM_INPUT_LENGTH
+def validate_text_input(text_content):
+    if not text_content or not isinstance(text_content, str):
+        return False, ""
+    cleaned_text = text_content.strip()
+    if not cleaned_text:
+        return False, ""
+    if len(cleaned_text) > MAXIMUM_INPUT_LENGTH:
+        return False, f"Input exceeds maximum length of {MAXIMUM_INPUT_LENGTH} characters."
+    return True, cleaned_text