import os, requests, gradio as gr from safety import safety_filter, refusal_reply HF_API_URL = os.getenv("HF_API_URL", "https://api-inference.huggingface.co/models/tiiuae/Falcon3-7B-Instruct") HF_TOKEN = os.getenv("HF_TOKEN") # store in Secrets (not Variables) MAX_NEW = int(os.getenv("MAX_NEW", "256")) TEMP = float(os.getenv("TEMP", "0.7")) TOP_P = float(os.getenv("TOP_P", "0.9")) HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {} SYSTEM_MSG = ( "You are Ved AGI — open, safe, and human-aligned. " "Be helpful, honest, and concise. Follow the safety policy: avoid harmful, illegal, or unethical advice; " "decline with a brief rationale and offer safer alternatives." ) def build_prompt(history, user_input, max_turns=5): turns = history[-max_turns:] if history else [] parts = [f"System: {SYSTEM_MSG}"] for u, a in turns: parts.append(f"User: {u}") parts.append(f"Assistant: {a}") parts.append(f"User: {user_input}") parts.append("Assistant:") return "\n".join(parts) def call_hf_api(prompt): payload = { "inputs": prompt, "parameters": { "max_new_tokens": MAX_NEW, "temperature": TEMP, "top_p": TOP_P, "stop": ["\nUser:", "\nSystem:"] } } r = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=120) r.raise_for_status() data = r.json() if isinstance(data, list) and data and "generated_text" in data[0]: return data[0]["generated_text"] if isinstance(data, dict) and "generated_text" in data: return data["generated_text"] if isinstance(data, dict) and "error" in data: return f"⚠️ API error: {data['error']}" return str(data) def vedagi_chat(user_input, history): safe_in, blocked_in, reason_in = safety_filter(user_input, mode="input") if blocked_in: return history + [[user_input, refusal_reply(reason_in)]] prompt = build_prompt(history, safe_in) try: out = call_hf_api(prompt) except Exception as e: out = f"⚠️ API request failed: {e}" # trim echoes for tag in ("Assistant:", "System:", "User:"): if out.startswith(tag): out = out[len(tag):].strip() safe_out, blocked_out, reason_out = safety_filter(out, mode="output") if blocked_out: safe_out = refusal_reply(reason_out) return history + [[user_input, safe_out]] with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌸 Ved AGI — v1 (HF Inference API)\nFalcon via serverless API + safety wrapper.") chat = gr.Chatbot(height=430) box = gr.Textbox(placeholder="Ask Ved AGI…", autofocus=True, label="Message") clear = gr.Button("Clear") box.submit(vedagi_chat, [box, chat], [chat]) clear.click(lambda: None, None, [chat]) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))