Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import torch | |
| from qwen_tts import Qwen3TTSModel | |
| MODEL_ID = "Bateesa/QWEN-TTS_Luganda_Base" | |
| tts = Qwen3TTSModel.from_pretrained( | |
| MODEL_ID, | |
| device_map="auto", # or "auto" | |
| dtype=torch.bfloat16, | |
| ) | |
| def plain_tts(text, language): | |
| # Plain TTS (no cloning). For English use "english"; for Luganda try "auto" + your fine-tuned weights. | |
| wavs, sr = tts.generate_voice_clone( | |
| text=text, | |
| language=language, # e.g. "english" or "auto" | |
| ref_audio=None, | |
| x_vector_only_mode=True, | |
| ) | |
| return sr, wavs[0] | |
| def clone_tts(ref_audio, ref_text, text, language, xvec_only): | |
| # Voice cloning: ref_audio + ref_text define the target voice | |
| if ref_audio is None: | |
| return None, None | |
| wavs, sr = tts.generate_voice_clone( | |
| text=text, | |
| language=language, # often "auto" | |
| ref_audio=ref_audio, # (np.array, sr) from Gradio | |
| ref_text=ref_text, | |
| x_vector_only_mode=xvec_only, # False => stronger ICL, better likeness; True => timbre only | |
| ) | |
| return sr, wavs[0] | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## Luganda / English TTS & Voice Cloning (Qwen3-TTS Fine-tuned)") | |
| with gr.Tab("Plain TTS"): | |
| txt = gr.Textbox(label="Text") | |
| lang = gr.Dropdown(choices=["auto", "english"], value="auto", label="Language") | |
| btn = gr.Button("Generate") | |
| audio_out = gr.Audio(label="Output", type="numpy") | |
| btn.click(plain_tts, inputs=[txt, lang], outputs=audio_out) | |
| with gr.Tab("Voice Cloning"): | |
| ref_audio = gr.Audio(label="Reference audio", type="numpy") | |
| ref_text = gr.Textbox(label="Reference transcript (same language as ref audio)") | |
| clone_text = gr.Textbox(label="Text to speak") | |
| clone_lang = gr.Dropdown(choices=["auto", "english"], value="auto", label="Language") | |
| xvec_only = gr.Checkbox(value=False, label="x-vector only (no ICL, timbre only)") | |
| clone_btn = gr.Button("Clone Voice") | |
| clone_out = gr.Audio(label="Cloned output", type="numpy") | |
| clone_btn.click( | |
| clone_tts, | |
| inputs=[ref_audio, ref_text, clone_text, clone_lang, xvec_only], | |
| outputs=clone_out, | |
| ) | |
| demo.launch() |