import os
import time
import requests
import gradio as gr
from safety import safety_filter, refusal_reply

# =========================
# Config via env variables
# =========================
FALCON_URL  = os.getenv("HF_API_URL_FALCON", "").strip()    # e.g., https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct
PRIMARY_URL = os.getenv("HF_API_URL_PRIMARY", "").strip()   # e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3
HF_TOKEN    = os.getenv("HF_TOKEN", "").strip()

MAX_NEW = int(os.getenv("MAX_NEW", "256"))
TEMP    = float(os.getenv("TEMP", "0.7"))
TOP_P   = float(os.getenv("TOP_P", "0.9"))

HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}

SYSTEM_MSG = (
    "You are Ved AGI — open, safe, and human-aligned. "
    "Be helpful, honest, and concise. Follow the safety policy: avoid harmful, illegal, or unethical advice; "
    "decline with a brief rationale and offer safer alternatives."
)

# Ordered preference: try Falcon first, then primary (Mistral, Llama, etc.)
API_ORDER = [u for u in [FALCON_URL, PRIMARY_URL] if u]

# ==============
# Prompt builder
# ==============
def build_prompt(history, user_input, max_turns=5):
    turns = history[-max_turns:] if history else []
    parts = [f"System: {SYSTEM_MSG}"]
    for u, a in turns:
        parts.append(f"User: {u}")
        parts.append(f"Assistant: {a}")
    parts.append(f"User: {user_input}")
    parts.append("Assistant:")
    return "\n".join(parts)

# ======================
# Inference API routines
# ======================
def parse_api_response(data):
    """
    The HF Inference API can return:
      - [{'generated_text': '...'}]
      - {'generated_text': '...'}
      - {'error': '...'}
      - other shapes depending on backend
    """
    if isinstance(data, list) and data and isinstance(data[0], dict):
        if "generated_text" in data[0]:
            return data[0]["generated_text"]
    if isinstance(data, dict):
        if "generated_text" in data:
            return data["generated_text"]
        if "error" in data:
            return f"⚠️ API error: {data['error']}"
    return str(data)

def call_api_once(url, prompt, timeout=120):
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": MAX_NEW,
            "temperature": TEMP,
            "top_p": TOP_P,
            "stop": ["\nUser:", "\nSystem:"]
        }
    }
    r = requests.post(url, headers=HEADERS, json=payload, timeout=timeout)
    if r.status_code == 503:
        # model is loading; surface a friendly message for logs/UI
        return None, f"🕙 Model cold start on {url}. Try again in a few seconds."
    r.raise_for_status()
    return parse_api_response(r.json()), None

def query_with_fallback(prompt):
    """
    Try endpoints in order. Returns (text, backend_label).
    Raises last error if all fail.
    """
    last_err = None
    for url in API_ORDER:
        label = "Falcon" if url == FALCON_URL else "Primary"
        try:
            data, cold = call_api_once(url, prompt)
            if cold:
                # brief wait & retry same URL once
                time.sleep(2)
                data, cold = call_api_once(url, prompt)
            if data:
                return data, label
        except Exception as e:
            last_err = e
    raise RuntimeError(f"All API endpoints failed. Last error: {last_err}")

# =====================
# Chat + Safety wrapper
# =====================
def vedagi_chat(user_input, history, status):
    # Pre-filter (RealSafe-style input)
    safe_in, blocked_in, reason_in = safety_filter(user_input, mode="input")
    if blocked_in:
        return history + [[user_input, refusal_reply(reason_in)]], status

    # Build prompt and query API (with fallback)
    prompt = build_prompt(history, safe_in)
    try:
        out, backend = query_with_fallback(prompt)
    except Exception as e:
        out, backend = f"⚠️ API request failed: {e}", "Offline"

    # Tidy echoes
    for tag in ("Assistant:", "System:", "User:"):
        if isinstance(out, str) and out.startswith(tag):
            out = out[len(tag):].strip()

    # Post-filter (RealSafe-style output)
    safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
    if blocked_out:
        safe_out = refusal_reply(reason_out)

    # Update banner with active backend
    status = f"**Backend:** {backend} • **MAX_NEW:** {MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}"
    return history + [[user_input, safe_out]], status

# =====
#  UI
# =====
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🌸 Ved AGI — v1 (HF Inference API)")
    status = gr.Markdown("**Backend:** (probing…) • **MAX_NEW:** "
                         f"{MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}")
    chat = gr.Chatbot(height=430)
    box  = gr.Textbox(placeholder="Ask Ved AGI…", autofocus=True, label="Message")
    clear = gr.Button("Clear")

    def _respond(msg, hist, stat):
        return vedagi_chat(msg, hist, stat)

    box.submit(_respond, [box, chat, status], [chat, status])
    clear.click(lambda: ([], "Ready."), None, [chat, status])

if __name__ == "__main__":
    # Ensure we have at least one URL
    if not API_ORDER:
        raise RuntimeError("No API endpoints configured. Set HF_API_URL_FALCON and/or HF_API_URL_PRIMARY in Variables.")
    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))