import os, json, sqlite3, asyncio, time from datetime import datetime, timedelta, timezone from threading import Lock import httpx from fastapi import FastAPI, HTTPException from fastapi.responses import FileResponse, JSONResponse from fastapi.middleware.cors import CORSMiddleware from starlette.staticfiles import StaticFiles from apscheduler.schedulers.asyncio import AsyncIOScheduler from pydantic import BaseModel, HttpUrl # -------------------------- # Config & Paths # -------------------------- PORT = int(os.getenv("PORT", "7860")) DATA_DIR = "/data" if os.path.isdir("/data") else "." DB_PATH = os.path.join(DATA_DIR, "uptime.db") DEFAULT_SITES = [ {"url": "https://huggingface.co", "name": "Hugging Face"}, {"url": "https://www.google.com", "name": "Google"}, {"url": "https://www.bbc.com", "name": "BBC"}, ] CHECK_INTERVAL_SECONDS = 300 # 5 minutes HTTP_TIMEOUT_SECONDS = 10 # -------------------------- # DB setup # -------------------------- os.makedirs(DATA_DIR, exist_ok=True) conn = sqlite3.connect(DB_PATH, check_same_thread=False) conn.row_factory = sqlite3.Row db_lock = Lock() def column_exists(table: str, col: str) -> bool: rows = conn.execute(f"PRAGMA table_info({table})").fetchall() return any(r["name"] == col for r in rows) def init_db(): with db_lock: c = conn.cursor() c.execute(""" CREATE TABLE IF NOT EXISTS sites ( url TEXT PRIMARY KEY, name TEXT NOT NULL, method TEXT DEFAULT 'GET', expected_min INT DEFAULT 200, expected_max INT DEFAULT 399 ); """) c.execute(""" CREATE TABLE IF NOT EXISTS checks ( id INTEGER PRIMARY KEY AUTOINCREMENT, ts TEXT NOT NULL, url TEXT NOT NULL, ok INT NOT NULL, status_code INT, ms REAL, FOREIGN KEY (url) REFERENCES sites(url) ); """) c.execute(""" CREATE TABLE IF NOT EXISTS incidents ( id INTEGER PRIMARY KEY AUTOINCREMENT, url TEXT NOT NULL, start_ts TEXT NOT NULL, end_ts TEXT, FOREIGN KEY (url) REFERENCES sites(url) ); """) # --- schema migration: add hf_token if missing --- if not column_exists("sites", "hf_token"): c.execute("ALTER TABLE sites ADD COLUMN hf_token TEXT") conn.commit() # Seed a few demo sites if empty existing = c.execute("SELECT COUNT(*) AS n FROM sites").fetchone()["n"] if existing == 0: for s in DEFAULT_SITES: try: c.execute("INSERT OR IGNORE INTO sites(url, name) VALUES (?, ?)", (s["url"], s["name"])) except sqlite3.Error: pass conn.commit() def list_sites(include_secret: bool = False): cols = "url, name, method, expected_min, expected_max" if include_secret: cols += ", hf_token" with db_lock: rows = conn.execute(f"SELECT {cols} FROM sites ORDER BY url").fetchall() return [dict(r) for r in rows] def get_last_check(url: str): with db_lock: r = conn.execute( "SELECT ts, ok, status_code, ms FROM checks WHERE url=? ORDER BY id DESC LIMIT 1", (url,) ).fetchone() return dict(r) if r else None def get_prev_check(url: str): with db_lock: r = conn.execute( "SELECT ts, ok, status_code, ms FROM checks WHERE url=? ORDER BY id DESC LIMIT 1 OFFSET 1", (url,) ).fetchone() return dict(r) if r else None def record_check(url: str, ok: int, status_code: int | None, ms: float | None, ts_iso: str): with db_lock: conn.execute("INSERT INTO checks(ts, url, ok, status_code, ms) VALUES (?,?,?,?,?)", (ts_iso, url, ok, status_code, ms)) conn.commit() def open_incident_if_needed(url: str, now_iso: str, prev_ok: int | None, current_ok: int): with db_lock: if (prev_ok == 1 or prev_ok is None) and current_ok == 0: conn.execute("INSERT INTO incidents(url, start_ts) VALUES (?,?)", (url, now_iso)) conn.commit() elif prev_ok == 0 and current_ok == 1: conn.execute( "UPDATE incidents SET end_ts=? WHERE url=? AND end_ts IS NULL", (now_iso, url) ) conn.commit() def incidents_for(url: str, limit: int = 50): with db_lock: rows = conn.execute( "SELECT id, url, start_ts, end_ts FROM incidents WHERE url=? ORDER BY id DESC LIMIT ?", (url, limit) ).fetchall() return [dict(r) for r in rows] def uptime_ratio(url: str, since: datetime): with db_lock: rows = conn.execute( "SELECT ok FROM checks WHERE url=? AND ts >= ?", (url, since.replace(tzinfo=timezone.utc).isoformat()) ).fetchall() if not rows: return None total = len(rows) ups = sum(1 for r in rows if r["ok"] == 1) return round(100.0 * ups / total, 2) # -------------------------- # HTTP checker # -------------------------- async def check_one(client: httpx.AsyncClient, site: dict): url = site["url"] expected_min = int(site.get("expected_min", 200)) expected_max = int(site.get("expected_max", 399)) # Build per-site headers (HF Org Spaces need Authorization) headers = {} token = (site.get("hf_token") or "").strip() if token: if token.lower().startswith("bearer "): token = token[7:].strip() headers["Authorization"] = f"Bearer {token}" t0 = time.perf_counter() code = None ok = 0 try: resp = await client.get(url, headers=headers, follow_redirects=True, timeout=HTTP_TIMEOUT_SECONDS) code = resp.status_code ok = 1 if (expected_min <= code <= expected_max) else 0 except Exception: ok = 0 ms = round((time.perf_counter() - t0) * 1000.0, 2) now_iso = datetime.now(timezone.utc).isoformat() prev = get_last_check(url) prev_ok = prev["ok"] if prev else None record_check(url, ok, code, ms, now_iso) open_incident_if_needed(url, now_iso, prev_ok, ok) return {"url": url, "ok": ok, "status_code": code, "ms": ms, "ts": now_iso} async def run_checks(): sites = list_sites(include_secret=True) if not sites: return [] limits = httpx.Limits(max_connections=20, max_keepalive_connections=10) async with httpx.AsyncClient(limits=limits) as client: tasks = [check_one(client, s) for s in sites] return await asyncio.gather(*tasks) # -------------------------- # API & Scheduler # -------------------------- app = FastAPI(title="HF Uptime Monitor", version="1.1.0") # bumped app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"] ) class SiteIn(BaseModel): url: HttpUrl name: str hf_token: str | None = None # optional Bearer token without prefix @app.on_event("startup") async def on_startup(): init_db() asyncio.create_task(run_checks()) scheduler = AsyncIOScheduler() scheduler.add_job(run_checks, "interval", seconds=CHECK_INTERVAL_SECONDS, next_run_time=datetime.now(timezone.utc)) scheduler.start() app.state.scheduler = scheduler @app.on_event("shutdown") async def on_shutdown(): sched = getattr(app.state, "scheduler", None) if sched: sched.shutdown(wait=False) # Static files (if you keep external files). If using the inline HTML above, # just place it at ./static/index.html and keep this as-is. app.mount("/static", StaticFiles(directory="static"), name="static") @app.get("/") def root(): return FileResponse("static/index.html") @app.get("/api/sites") def api_sites(): # Public listing (no secrets) public = list_sites(include_secret=False) return JSONResponse(public) @app.post("/api/sites") def api_add_site(site: SiteIn): url_s = str(site.url) name_s = (site.name or "").strip() or url_s tok = (site.hf_token or "").strip() if tok.lower().startswith("bearer "): tok = tok[7:].strip() with db_lock: try: # upsert-ish: try insert; if exists, update token/name conn.execute( "INSERT OR IGNORE INTO sites(url, name, hf_token) VALUES (?, ?, ?)", (url_s, name_s, tok or None) ) conn.execute( "UPDATE sites SET name=?, hf_token=? WHERE url=?", (name_s, tok or None, url_s) ) conn.commit() except sqlite3.Error as e: raise HTTPException(400, f"DB error: {e}") return {"ok": True} @app.delete("/api/sites") def api_delete_site(url: str): with db_lock: conn.execute("DELETE FROM sites WHERE url=?", (url,)) conn.execute("DELETE FROM checks WHERE url=?", (url,)) conn.execute("DELETE FROM incidents WHERE url=?", (url,)) conn.commit() return {"ok": True} @app.post("/api/check-now") async def api_check_now(): results = await run_checks() return results @app.get("/api/status") def api_status(): sites = list_sites(include_secret=False) # do NOT leak tokens now = datetime.now(timezone.utc) since_24h = now - timedelta(hours=24) since_7d = now - timedelta(days=7) out = [] for s in sites: last = get_last_check(s["url"]) u24 = uptime_ratio(s["url"], since_24h) u7 = uptime_ratio(s["url"], since_7d) with db_lock: open_inc = conn.execute( "SELECT 1 FROM incidents WHERE url=? AND end_ts IS NULL LIMIT 1", (s["url"],) ).fetchone() open_incident = bool(open_inc) out.append({ "url": s["url"], "name": s["name"], "last": last, "uptime24h": u24, "uptime7d": u7, "open_incident": open_incident }) return out @app.get("/api/incidents") def api_incidents(url: str): return incidents_for(url, limit=100) @app.get("/api/ping") def api_ping(): return {"message": "pong"} if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=PORT)