universal_translator / translator.py
joelazo
Fix HuggingFace Spaces deployment
488b15f
"""
Universal Translator - Main UI Module
Gradio-based user interface for the translation application.
"""
import gradio as gr
from dotenv import load_dotenv
from config import (
ModelConfig,
LanguageConfig,
VoiceConfig,
UIConfig,
get_popular_languages_list
)
from translation_service import TranslationService
from voice_handler import (
create_stt_provider,
create_tts_provider,
get_available_stt_providers,
get_available_tts_providers,
get_voices_for_provider
)
# Load environment variables
load_dotenv(override=True)
# Initialize translation service
translation_service = TranslationService()
# ============================================================================
# Voice Processing Functions
# ============================================================================
def transcribe_audio(audio_path, stt_provider_name):
"""
Transcribe audio to text using selected STT provider.
Args:
audio_path: Path to audio file
stt_provider_name: Name of STT provider
Returns:
Transcribed text or error message
"""
if audio_path is None:
return ""
try:
stt_provider = create_stt_provider(stt_provider_name)
text = stt_provider.transcribe(audio_path)
return text
except Exception as e:
return f"[Transcription Error: {str(e)}]"
def synthesize_speech(text, tts_provider_name, tts_voice):
"""
Synthesize text to speech using selected TTS provider.
Args:
text: Text to synthesize
tts_provider_name: Name of TTS provider
tts_voice: Voice to use
Returns:
Path to generated audio file or None if failed
"""
if not text or not text.strip():
return None
try:
tts_provider = create_tts_provider(tts_provider_name, voice=tts_voice)
audio_path = tts_provider.synthesize(text)
return audio_path
except Exception as e:
print(f"TTS Error: {str(e)}")
return None
# ============================================================================
# UI Helper Functions
# ============================================================================
def update_voice_dropdown(tts_provider_name):
"""
Update the voice dropdown based on selected TTS provider.
Args:
tts_provider_name: Name of TTS provider
Returns:
Update dictionary for the dropdown
"""
voices = get_voices_for_provider(tts_provider_name)
return gr.update(choices=voices, value=voices[0] if voices else None)
def toggle_buttons(text):
"""
Enable/disable Translate and Clear buttons based on text input.
Args:
text: Input text from textbox
Returns:
Tuple of update dictionaries for (translate_button, clear_button)
"""
has_text = bool(text and text.strip())
return gr.update(interactive=has_text), gr.update(interactive=has_text)
# ============================================================================
# Translation Handler Functions
# ============================================================================
def process_translation(
input_text,
target_language,
max_tokens,
temperature,
enable_tts,
tts_provider_name,
tts_voice
):
"""
Process translation request with text input.
Args:
input_text: Text to translate
target_language: Target language for translation
max_tokens: Maximum tokens for translation
temperature: Model temperature
enable_tts: Whether to generate speech output
tts_provider_name: TTS provider name
tts_voice: TTS voice
Returns:
Tuple of (translated_text, detected_language_info, audio_output)
"""
if not input_text.strip():
return "", "No input provided", None
# Translate the text using the service
translated_text, detected_info = translation_service.translate_text(
input_text,
target_language,
max_tokens,
temperature
)
# Generate speech if enabled
audio_output = None
if enable_tts and translated_text and not translated_text.startswith("Translation Error"):
audio_output = synthesize_speech(translated_text, tts_provider_name, tts_voice)
return translated_text, detected_info, audio_output
def process_voice_translation(
audio,
target_language,
stt_provider_name,
max_tokens,
temperature,
enable_tts,
tts_provider_name,
tts_voice
):
"""
Process translation request with voice input.
Args:
audio: Audio file from microphone
target_language: Target language for translation
stt_provider_name: STT provider name
max_tokens: Maximum tokens for translation
temperature: Model temperature
enable_tts: Whether to generate speech output
tts_provider_name: TTS provider name
tts_voice: TTS voice
Returns:
Tuple of (input_text, translated_text, detected_language_info, audio_output)
"""
if audio is None:
return "", "", "No audio input", None
# Transcribe audio
input_text = transcribe_audio(audio, stt_provider_name)
if not input_text or input_text.startswith("[Transcription Error"):
return input_text, "", "Transcription failed", None
# Translate the transcribed text
translated_text, detected_info, audio_output = process_translation(
input_text,
target_language,
max_tokens,
temperature,
enable_tts,
tts_provider_name,
tts_voice
)
return input_text, translated_text, detected_info, audio_output
# ============================================================================
# Gradio UI
# ============================================================================
def create_ui():
"""Create and configure the Gradio UI."""
with gr.Blocks(title=UIConfig.APP_TITLE) as demo:
# Header
gr.Markdown(f"# {ModelConfig.TITLE}")
gr.Markdown(f"Powered by **{ModelConfig.MODEL_NAME}** - Supporting 1000+ Languages 🌍")
gr.Markdown(UIConfig.APP_DESCRIPTION)
with gr.Row():
# Main content column
with gr.Column(scale=3):
# Target language selector
target_language = gr.Dropdown(
choices=get_popular_languages_list(),
value=LanguageConfig.DEFAULT_TARGET_LANGUAGE,
label="Target Language",
info="Select the language to translate to"
)
# Text input section
gr.Markdown("### Text Input")
with gr.Row():
input_text = gr.Textbox(
label="Enter text to translate",
placeholder="Type or paste text in any language...",
lines=UIConfig.INPUT_TEXT_LINES
)
with gr.Row():
translate_btn = gr.Button("Translate", variant="primary", scale=2, interactive=False)
clear_btn = gr.Button("Clear", scale=1, interactive=False)
# Voice input section
gr.Markdown("### Voice Input")
with gr.Row():
with gr.Column(scale=4):
voice_input = gr.Audio(
sources=["microphone"],
type="filepath",
label="Record Audio (Auto-translates on stop)"
)
# Output section
gr.Markdown("### Translation Output")
detected_language = gr.Textbox(
label="Detected Source Language",
interactive=False
)
translated_text = gr.Textbox(
label="Translated Text",
lines=UIConfig.OUTPUT_TEXT_LINES,
interactive=False
)
# Voice output section
voice_output = gr.Audio(
label="Translated Audio",
autoplay=True,
visible=True
)
# Settings panel
with gr.Column(scale=1):
gr.Markdown("### Translation Settings")
max_tokens = gr.Slider(
minimum=UIConfig.MAX_TOKENS_MIN,
maximum=UIConfig.MAX_TOKENS_MAX,
value=ModelConfig.DEFAULT_MAX_TOKENS,
step=UIConfig.MAX_TOKENS_STEP,
label="Max Tokens",
info="Maximum length of translation"
)
temperature = gr.Slider(
minimum=UIConfig.TEMPERATURE_MIN,
maximum=UIConfig.TEMPERATURE_MAX,
value=ModelConfig.DEFAULT_TEMPERATURE,
step=UIConfig.TEMPERATURE_STEP,
label="Temperature",
info="Lower = more literal, Higher = more creative"
)
gr.Markdown("### Voice Settings")
stt_provider = gr.Dropdown(
choices=get_available_stt_providers(),
value=VoiceConfig.DEFAULT_STT_PROVIDER,
label="Speech-to-Text Provider",
info="For voice input"
)
enable_voice_output = gr.Checkbox(
label="Enable Voice Output",
value=VoiceConfig.DEFAULT_VOICE_OUTPUT_ENABLED,
info="Generate audio for translations"
)
tts_provider = gr.Dropdown(
choices=get_available_tts_providers(),
value=VoiceConfig.DEFAULT_TTS_PROVIDER,
label="Text-to-Speech Provider",
info="For audio output"
)
tts_voice = gr.Dropdown(
choices=get_voices_for_provider(VoiceConfig.DEFAULT_TTS_PROVIDER),
value=VoiceConfig.DEFAULT_TTS_VOICE,
label="TTS Voice",
info="Select voice style"
)
# Event handlers
# Text translation
translate_btn.click(
process_translation,
inputs=[
input_text,
target_language,
max_tokens,
temperature,
enable_voice_output,
tts_provider,
tts_voice
],
outputs=[translated_text, detected_language, voice_output]
)
# Enable/disable buttons when text changes
input_text.change(
toggle_buttons,
inputs=[input_text],
outputs=[translate_btn, clear_btn]
)
# Auto-trigger voice translation when recording stops
voice_input.change(
process_voice_translation,
inputs=[
voice_input,
target_language,
stt_provider,
max_tokens,
temperature,
enable_voice_output,
tts_provider,
tts_voice
],
outputs=[input_text, translated_text, detected_language, voice_output]
)
# Clear button
clear_btn.click(
lambda: ("", "", "", None, None),
outputs=[input_text, translated_text, detected_language, voice_output, voice_input]
)
# Update TTS voice dropdown when provider changes
tts_provider.change(
update_voice_dropdown,
inputs=[tts_provider],
outputs=[tts_voice]
)
return demo
# ============================================================================
# Main Entry Point
# ============================================================================
# Create custom theme
theme = gr.themes.Soft(
primary_hue=UIConfig.THEME_PRIMARY_HUE,
secondary_hue=UIConfig.THEME_SECONDARY_HUE,
neutral_hue=UIConfig.THEME_NEUTRAL_HUE,
font=UIConfig.THEME_FONT
)
# Create the demo at module level (required for HuggingFace Spaces)
demo = create_ui()
def main():
"""Main entry point for the translator app."""
demo.launch(
share=UIConfig.SHARE_LINK,
inbrowser=UIConfig.OPEN_IN_BROWSER,
theme=theme,
css=UIConfig.CUSTOM_CSS
)
if __name__ == "__main__":
main()