Spaces:

VEDAGI1
/

Ved_AGI

Sleeping

App Files Files Community

Rajan Sharma commited on Aug 17

Commit

327f806

verified ·

1 Parent(s): 15441ed

Update app.py

Browse files

Files changed (1) hide show

app.py +97 -26

app.py CHANGED Viewed

@@ -1,11 +1,19 @@
-import os, requests, gradio as gr
 from safety import safety_filter, refusal_reply
-HF_API_URL = os.getenv("HF_API_URL", "https://api-inference.huggingface.co/models/tiiuae/Falcon3-7B-Instruct")
-HF_TOKEN   = os.getenv("HF_TOKEN")  # store in Secrets (not Variables)
-MAX_NEW    = int(os.getenv("MAX_NEW", "256"))
-TEMP       = float(os.getenv("TEMP", "0.7"))
-TOP_P      = float(os.getenv("TOP_P", "0.9"))
 HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
@@ -15,6 +23,12 @@ SYSTEM_MSG = (
     "decline with a brief rationale and offer safer alternatives."
 )
 def build_prompt(history, user_input, max_turns=5):
     turns = history[-max_turns:] if history else []
     parts = [f"System: {SYSTEM_MSG}"]
@@ -25,7 +39,28 @@ def build_prompt(history, user_input, max_turns=5):
     parts.append("Assistant:")
     return "\n".join(parts)
-def call_hf_api(prompt):
     payload = {
         "inputs": prompt,
         "parameters": {
@@ -35,48 +70,84 @@ def call_hf_api(prompt):
             "stop": ["\nUser:", "\nSystem:"]
         }
     }
-    r = requests.post(HF_API_URL, headers=HEADERS, json=payload, timeout=120)
     r.raise_for_status()
-    data = r.json()
-    if isinstance(data, list) and data and "generated_text" in data[0]:
-        return data[0]["generated_text"]
-    if isinstance(data, dict) and "generated_text" in data:
-        return data["generated_text"]
-    if isinstance(data, dict) and "error" in data:
-        return f"⚠️ API error: {data['error']}"
-    return str(data)
-def vedagi_chat(user_input, history):
     safe_in, blocked_in, reason_in = safety_filter(user_input, mode="input")
     if blocked_in:
-        return history + [[user_input, refusal_reply(reason_in)]]
     prompt = build_prompt(history, safe_in)
     try:
-        out = call_hf_api(prompt)
     except Exception as e:
-        out = f"⚠️ API request failed: {e}"
-    # trim echoes
     for tag in ("Assistant:", "System:", "User:"):
-        if out.startswith(tag):
             out = out[len(tag):].strip()
     safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
     if blocked_out:
         safe_out = refusal_reply(reason_out)
-    return history + [[user_input, safe_out]]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# 🌸 Ved AGI — v1 (HF Inference API)\nFalcon via serverless API + safety wrapper.")
     chat = gr.Chatbot(height=430)
     box  = gr.Textbox(placeholder="Ask Ved AGI…", autofocus=True, label="Message")
     clear = gr.Button("Clear")
-    box.submit(vedagi_chat, [box, chat], [chat])
-    clear.click(lambda: None, None, [chat])
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

+import os
+import time
+import requests
+import gradio as gr
 from safety import safety_filter, refusal_reply
+# =========================
+# Config via env variables
+# =========================
+FALCON_URL  = os.getenv("HF_API_URL_FALCON", "").strip()    # e.g., https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct
+PRIMARY_URL = os.getenv("HF_API_URL_PRIMARY", "").strip()   # e.g., https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3
+HF_TOKEN    = os.getenv("HF_TOKEN", "").strip()
+MAX_NEW = int(os.getenv("MAX_NEW", "256"))
+TEMP    = float(os.getenv("TEMP", "0.7"))
+TOP_P   = float(os.getenv("TOP_P", "0.9"))
 HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
     "decline with a brief rationale and offer safer alternatives."
 )
+# Ordered preference: try Falcon first, then primary (Mistral, Llama, etc.)
+API_ORDER = [u for u in [FALCON_URL, PRIMARY_URL] if u]
+# ==============
+# Prompt builder
+# ==============
 def build_prompt(history, user_input, max_turns=5):
     turns = history[-max_turns:] if history else []
     parts = [f"System: {SYSTEM_MSG}"]
     parts.append("Assistant:")
     return "\n".join(parts)
+# ======================
+# Inference API routines
+# ======================
+def parse_api_response(data):
+    """
+    The HF Inference API can return:
+      - [{'generated_text': '...'}]
+      - {'generated_text': '...'}
+      - {'error': '...'}
+      - other shapes depending on backend
+    """
+    if isinstance(data, list) and data and isinstance(data[0], dict):
+        if "generated_text" in data[0]:
+            return data[0]["generated_text"]
+    if isinstance(data, dict):
+        if "generated_text" in data:
+            return data["generated_text"]
+        if "error" in data:
+            return f"⚠️ API error: {data['error']}"
+    return str(data)
+def call_api_once(url, prompt, timeout=120):
     payload = {
         "inputs": prompt,
         "parameters": {
             "stop": ["\nUser:", "\nSystem:"]
         }
     }
+    r = requests.post(url, headers=HEADERS, json=payload, timeout=timeout)
+    if r.status_code == 503:
+        # model is loading; surface a friendly message for logs/UI
+        return None, f"🕙 Model cold start on {url}. Try again in a few seconds."
     r.raise_for_status()
+    return parse_api_response(r.json()), None
+def query_with_fallback(prompt):
+    """
+    Try endpoints in order. Returns (text, backend_label).
+    Raises last error if all fail.
+    """
+    last_err = None
+    for url in API_ORDER:
+        label = "Falcon" if url == FALCON_URL else "Primary"
+        try:
+            data, cold = call_api_once(url, prompt)
+            if cold:
+                # brief wait & retry same URL once
+                time.sleep(2)
+                data, cold = call_api_once(url, prompt)
+            if data:
+                return data, label
+        except Exception as e:
+            last_err = e
+    raise RuntimeError(f"All API endpoints failed. Last error: {last_err}")
+# =====================
+# Chat + Safety wrapper
+# =====================
+def vedagi_chat(user_input, history, status):
+    # Pre-filter (RealSafe-style input)
     safe_in, blocked_in, reason_in = safety_filter(user_input, mode="input")
     if blocked_in:
+        return history + [[user_input, refusal_reply(reason_in)]], status
+    # Build prompt and query API (with fallback)
     prompt = build_prompt(history, safe_in)
     try:
+        out, backend = query_with_fallback(prompt)
     except Exception as e:
+        out, backend = f"⚠️ API request failed: {e}", "Offline"
+    # Tidy echoes
     for tag in ("Assistant:", "System:", "User:"):
+        if isinstance(out, str) and out.startswith(tag):
             out = out[len(tag):].strip()
+    # Post-filter (RealSafe-style output)
     safe_out, blocked_out, reason_out = safety_filter(out, mode="output")
     if blocked_out:
         safe_out = refusal_reply(reason_out)
+    # Update banner with active backend
+    status = f"**Backend:** {backend} • **MAX_NEW:** {MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}"
+    return history + [[user_input, safe_out]], status
+# =====
+#  UI
+# =====
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🌸 Ved AGI — v1 (HF Inference API)")
+    status = gr.Markdown("**Backend:** (probing…) • **MAX_NEW:** "
+                         f"{MAX_NEW} • **TEMP:** {TEMP} • **TOP_P:** {TOP_P}")
     chat = gr.Chatbot(height=430)
     box  = gr.Textbox(placeholder="Ask Ved AGI…", autofocus=True, label="Message")
     clear = gr.Button("Clear")
+    def _respond(msg, hist, stat):
+        return vedagi_chat(msg, hist, stat)
+    box.submit(_respond, [box, chat, status], [chat, status])
+    clear.click(lambda: ([], "Ready."), None, [chat, status])
 if __name__ == "__main__":
+    # Ensure we have at least one URL
+    if not API_ORDER:
+        raise RuntimeError("No API endpoints configured. Set HF_API_URL_FALCON and/or HF_API_URL_PRIMARY in Variables.")
     demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))