import os import time import requests import gradio as gr from safety import safety_filter, refusal_reply # ========================= # Config via env variables # ========================= FALCON_URL = os.getenv("HF_API_URL_FALCON", "").strip() # e.g., https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct PRIMARY_URL = os.getenv("HF_API_URL_PRIMARY", "").strip() # e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3 HF_TOKEN = os.getenv("HF_TOKEN", "").strip() MAX_NEW = int(os.getenv("MAX_NEW", "256")) TEMP = float(os.getenv("TEMP", "0.7")) TOP_P = float(os.getenv("TOP_P", "0.9")) HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} SYSTEM_MSG = ( "You are Ved AGI — open, safe, and human-aligned. " "Be helpful, honest, and concise. Follow the safety policy: avoid harmful, illegal, or unethical advice; " "decline with a brief rationale and offer safer alternatives." ) # Ordered preference: try Falcon first, then primary (Mistral, Llama, etc.) API_ORDER = [u for u in [FALCON_URL, PRIMARY_URL] if u] # ============== # Prompt builder # ============== def build_prompt(history, user_input, max_turns=5): turns = history[-max_turns:] if history else [] parts = [f"System: {SYSTEM_MSG}"] for u, a in turns: parts.append(f"User: {u}") parts.append(f"Assistant: {a}") parts.append(f"User: {user_input}") parts.append("Assistant:") return "\n".join(parts) # ====================== # Inference API routines # ====================== def parse_api_response(data): """ The HF Inference API can return: - [{'generated_text': '...'}] - {'generated_text': '...'} - {'error': '...'} - other shapes depending on backend """ if isinstance(data, list) and data and isinstance(data[0], dict): if "generated_text" in data[0]: return data[0]["generated_text"] if isinstance(data, dict): if "generated_text" in data: return data["generated_text"] if "error" in data: return f"⚠️ API error: {data['error']}" return str(data) def call_api_once(url, prompt, timeout=120): payload = { "inputs": prompt, "parameters": { "max_new_tokens": MAX_NEW, "temperature": TEMP, "top_p": TOP_P, "stop": ["\nUser:", "\nSystem:"] } } r = requests.post(url, headers=HEADERS, json=payload, timeout=timeout) if r.status_code == 503: # model is loading; surface a friendly message for logs/UI return None, f"🕙 Model cold start on {url}. Try again in a few seconds." r.raise_for_status() return parse_api_response(r.json()), None def query_with_fallback(prompt): """ Try endpoints in order. Returns (text, backend_label). Raises last error if all fail. """ last_err = None for url in API_ORDER: label = "Falcon" if url == FALCON_URL else "Primary" try: data, cold = call_api_once(url, prompt) if cold: # brief wait & retry same URL once time.sleep(2) data, cold = call_api_once(url, prompt) if data: return data, label except Exception as e: last_err = e raise RuntimeError(f"All API endpoints failed. Last error: {last_err}") # ===================== # Chat + Safety wrapper # ===================== def vedagi_chat(user_input, history, status): # Pre-filter (RealSafe-style input) safe_in, blocked_in, reason_in = safety_filter(user_input, mode="input") if blocked_in: return history + [[user_input, refusal_reply(reason_in)]], status # Build prompt and query API (with fallback) prompt = build_prompt(history, safe_in) try: out, backend = query_with_fallback(prompt) except Exception as e: out, backend = f"⚠️ API request failed: {e}", "Offline" # Tidy echoes for tag in ("Assistant:", "System:", "User:"): if isinstance(out, str) and out.startswith(tag): out = out[len(tag):].strip() # Post-filter (RealSafe-style output) safe_out, blocked_out, reason_out = safety_filter(out, mode="output") if blocked_out: safe_out = refusal_reply(reason_out) # Update banner with active backend status = f"**Backend:** {backend} • **MAX_NEW:** {MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}" return history + [[user_input, safe_out]], status # ===== # UI # ===== with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌸 Ved AGI — v1 (HF Inference API)") status = gr.Markdown("**Backend:** (probing…) • **MAX_NEW:** " f"{MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}") chat = gr.Chatbot(height=430) box = gr.Textbox(placeholder="Ask Ved AGI…", autofocus=True, label="Message") clear = gr.Button("Clear") def _respond(msg, hist, stat): return vedagi_chat(msg, hist, stat) box.submit(_respond, [box, chat, status], [chat, status]) clear.click(lambda: ([], "Ready."), None, [chat, status]) if __name__ == "__main__": # Ensure we have at least one URL if not API_ORDER: raise RuntimeError("No API endpoints configured. Set HF_API_URL_FALCON and/or HF_API_URL_PRIMARY in Variables.") demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))