import torch import numpy as np import gradio as gr from transformers import pipeline import logging from scipy.io.wavfile import write import uuid import os import warnings # ----------------------------- # SUPPRESS WARNINGS # ----------------------------- warnings.filterwarnings("ignore", category=FutureWarning) logging.getLogger("transformers").setLevel(logging.ERROR) # ----------------------------- # DEVICE SETUP # ----------------------------- device = 0 if torch.cuda.is_available() else -1 # ----------------------------- # PATH TO FINE-TUNED MODEL # ----------------------------- model_dir = "./" # مسیر فایل‌های fine-tuned Orpheus در Space # ----------------------------- # LOAD TTS PIPELINE # ----------------------------- tts_pipe = pipeline( task="text-to-speech", model=model_dir, device=device ) # ----------------------------- # INFERENCE FUNCTION # ----------------------------- def tts_generate(text): if not text.strip(): return None # اجرای مدل TTS output = tts_pipe(text) if "audio" not in output: raise ValueError("TTS pipeline did not return audio") audio = np.array(output["audio"], dtype=np.float32) # sanitize audio to avoid RuntimeWarning audio = np.nan_to_num(audio) # convert NaN/Inf to 0 audio = np.clip(audio, -1.0, 1.0) # limit values to [-1,1] # بررسی و مقدار پیش‌فرض sampling rate sr = output.get("sampling_rate") or 22050 # تبدیل float32 به int16 audio_int16 = (audio * 32767).astype(np.int16) # ساخت پوشه خروجی os.makedirs("outputs", exist_ok=True) out_path = f"outputs/{uuid.uuid4().hex}.wav" # ذخیره WAV write(out_path, sr, audio_int16) return out_path # ----------------------------- # SAMPLE TEXTS # ----------------------------- SAMPLES = [ "Just end up crashing somewhere. No, because remember last time?", "Hmm… I don't know. This feels like a bad idea. ", "I'm so tired today but I still have so much work to do.", ] # ----------------------------- # GRADIO INTERFACE # ----------------------------- demo = gr.Interface( fn=tts_generate, inputs=gr.Textbox( label="Enter text (use expressive tags like , )", lines=5, placeholder=SAMPLES[0], ), outputs=gr.Audio(type="filepath", label="Generated Audio"), title="Fine-tuned Orpheus-3B Expressive TTS", examples=[[s] for s in SAMPLES], ) # ----------------------------- # CLEAN RUN # ----------------------------- if __name__ == "__main__": demo.launch(ssr_mode=False) # کاهش خطاهای asyncio / Invalid file descriptor