import torch
import numpy as np
import gradio as gr
from transformers import pipeline
import logging
from scipy.io.wavfile import write
import uuid
import os
import warnings

# -----------------------------
# SUPPRESS WARNINGS
# -----------------------------
warnings.filterwarnings("ignore", category=FutureWarning)
logging.getLogger("transformers").setLevel(logging.ERROR)

# -----------------------------
# DEVICE SETUP
# -----------------------------
device = 0 if torch.cuda.is_available() else -1

# -----------------------------
# PATH TO FINE-TUNED MODEL
# -----------------------------
model_dir = "./"  # مسیر فایل‌های fine-tuned Orpheus در Space

# -----------------------------
# LOAD TTS PIPELINE
# -----------------------------
tts_pipe = pipeline(
    task="text-to-speech",
    model=model_dir,
    device=device
)

# -----------------------------
# INFERENCE FUNCTION
# -----------------------------
def tts_generate(text):
    if not text.strip():
        return None

    # اجرای مدل TTS
    output = tts_pipe(text)

    if "audio" not in output:
        raise ValueError("TTS pipeline did not return audio")

    audio = np.array(output["audio"], dtype=np.float32)

    # sanitize audio to avoid RuntimeWarning
    audio = np.nan_to_num(audio)          # convert NaN/Inf to 0
    audio = np.clip(audio, -1.0, 1.0)    # limit values to [-1,1]

    # بررسی و مقدار پیش‌فرض sampling rate
    sr = output.get("sampling_rate") or 22050

    # تبدیل float32 به int16
    audio_int16 = (audio * 32767).astype(np.int16)

    # ساخت پوشه خروجی
    os.makedirs("outputs", exist_ok=True)
    out_path = f"outputs/{uuid.uuid4().hex}.wav"

    # ذخیره WAV
    write(out_path, sr, audio_int16)

    return out_path

# -----------------------------
# SAMPLE TEXTS
# -----------------------------
SAMPLES = [
    "Just end up crashing somewhere. <laugh> No, because remember last time?",
    "Hmm… I don't know. <laugh> This feels like a bad idea. <gasp>",
    "I'm so tired today <yawn> but I still have so much work to do.",
]

# -----------------------------
# GRADIO INTERFACE
# -----------------------------
demo = gr.Interface(
    fn=tts_generate,
    inputs=gr.Textbox(
        label="Enter text (use expressive tags like <laugh>, <sigh>)",
        lines=5,
        placeholder=SAMPLES[0],
    ),
    outputs=gr.Audio(type="filepath", label="Generated Audio"),
    title="Fine-tuned Orpheus-3B Expressive TTS",
    examples=[[s] for s in SAMPLES],
)

# -----------------------------
# CLEAN RUN
# -----------------------------
if __name__ == "__main__":
    demo.launch(ssr_mode=False)  # کاهش خطاهای asyncio / Invalid file descriptor