File size: 3,085 Bytes
ae7973d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import re
import json
import math
import textwrap
from typing import List, Dict, Any, Optional

import requests
from bs4 import BeautifulSoup
from readability import Document
from ddgs import DDGS


# ---------- Web tools ----------

def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
    """
    DuckDuckGo search.
    Returns: [{title, href, body}, ...]
    """
    results: List[Dict[str, str]] = []
    with DDGS() as ddgs:
        for r in ddgs.text(query, max_results=max_results):
            results.append({
                "title": r.get("title", ""),
                "href": r.get("href", ""),
                "body": r.get("body", ""),
            })
    return results


def fetch_url(url: str, timeout: int = 20) -> str:
    """
    Fetch raw HTML/text from a URL.
    """
    headers = {
        "User-Agent": (
            "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
            "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
        )
    }
    resp = requests.get(url, headers=headers, timeout=timeout)
    resp.raise_for_status()
    return resp.text


def extract_main_text(html: str, max_chars: int = 12000) -> str:
    """
    Extract main readable text from HTML, trimming to max_chars.
    """
    try:
        doc = Document(html)
        content_html = doc.summary()
        soup = BeautifulSoup(content_html, "lxml")
        text = soup.get_text(" ", strip=True)
    except Exception:
        soup = BeautifulSoup(html, "lxml")
        text = soup.get_text(" ", strip=True)

    text = re.sub(r"\s+", " ", text).strip()
    if len(text) > max_chars:
        text = text[:max_chars] + "โ€ฆ"
    return text


# ---------- Safe-ish python tool ----------
# This is not a perfect sandbox, but it blocks imports and dangerous builtins.
# For public Spaces, keep it restrictive.

_ALLOWED_BUILTINS = {
    "abs": abs,
    "min": min,
    "max": max,
    "sum": sum,
    "len": len,
    "range": range,
    "round": round,
    "sorted": sorted,
    "enumerate": enumerate,
    "zip": zip,
    "map": map,
    "filter": filter,
    "list": list,
    "dict": dict,
    "set": set,
    "tuple": tuple,
    "str": str,
    "int": int,
    "float": float,
    "bool": bool,
    "json": json,
    "math": math,
}

def python_tool(code: str) -> str:
    """
    Executes small python snippets.
    - No imports
    - Restricted builtins
    - Must set a variable named `result` to return something
    """
    if "import " in code or "__" in code:
        return "Blocked: imports and dunder usage are not allowed."

    local_vars: Dict[str, Any] = {}
    global_vars: Dict[str, Any] = {"__builtins__": _ALLOWED_BUILTINS}

    try:
        exec(code, global_vars, local_vars)
    except Exception as e:
        return f"Python error: {type(e).__name__}: {e}"

    if "result" not in local_vars:
        return "No `result` variable found. Set `result = ...` to return output."

    out = local_vars["result"]
    if isinstance(out, (dict, list, tuple)):
        return json.dumps(out, ensure_ascii=False, indent=2)
    return str(out)