|
|
import torch |
|
|
import numpy as np |
|
|
import gradio as gr |
|
|
from transformers import pipeline |
|
|
import logging |
|
|
from scipy.io.wavfile import write |
|
|
import uuid |
|
|
import os |
|
|
import warnings |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
warnings.filterwarnings("ignore", category=FutureWarning) |
|
|
logging.getLogger("transformers").setLevel(logging.ERROR) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = 0 if torch.cuda.is_available() else -1 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_dir = "./" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tts_pipe = pipeline( |
|
|
task="text-to-speech", |
|
|
model=model_dir, |
|
|
device=device |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tts_generate(text): |
|
|
if not text.strip(): |
|
|
return None |
|
|
|
|
|
|
|
|
output = tts_pipe(text) |
|
|
|
|
|
if "audio" not in output: |
|
|
raise ValueError("TTS pipeline did not return audio") |
|
|
|
|
|
audio = np.array(output["audio"], dtype=np.float32) |
|
|
|
|
|
|
|
|
audio = np.nan_to_num(audio) |
|
|
audio = np.clip(audio, -1.0, 1.0) |
|
|
|
|
|
|
|
|
sr = output.get("sampling_rate") or 22050 |
|
|
|
|
|
|
|
|
audio_int16 = (audio * 32767).astype(np.int16) |
|
|
|
|
|
|
|
|
os.makedirs("outputs", exist_ok=True) |
|
|
out_path = f"outputs/{uuid.uuid4().hex}.wav" |
|
|
|
|
|
|
|
|
write(out_path, sr, audio_int16) |
|
|
|
|
|
return out_path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SAMPLES = [ |
|
|
"Just end up crashing somewhere. <laugh> No, because remember last time?", |
|
|
"Hmm… I don't know. <laugh> This feels like a bad idea. <gasp>", |
|
|
"I'm so tired today <yawn> but I still have so much work to do.", |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=tts_generate, |
|
|
inputs=gr.Textbox( |
|
|
label="Enter text (use expressive tags like <laugh>, <sigh>)", |
|
|
lines=5, |
|
|
placeholder=SAMPLES[0], |
|
|
), |
|
|
outputs=gr.Audio(type="filepath", label="Generated Audio"), |
|
|
title="Fine-tuned Orpheus-3B Expressive TTS", |
|
|
examples=[[s] for s in SAMPLES], |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch(ssr_mode=False) |
|
|
|