y-hwa19's picture
add tools.py
ae7973d verified
import re
import json
import math
import textwrap
from typing import List, Dict, Any, Optional
import requests
from bs4 import BeautifulSoup
from readability import Document
from ddgs import DDGS
# ---------- Web tools ----------
def web_search(query: str, max_results: int = 5) -> List[Dict[str, str]]:
"""
DuckDuckGo search.
Returns: [{title, href, body}, ...]
"""
results: List[Dict[str, str]] = []
with DDGS() as ddgs:
for r in ddgs.text(query, max_results=max_results):
results.append({
"title": r.get("title", ""),
"href": r.get("href", ""),
"body": r.get("body", ""),
})
return results
def fetch_url(url: str, timeout: int = 20) -> str:
"""
Fetch raw HTML/text from a URL.
"""
headers = {
"User-Agent": (
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/120.0 Safari/537.36"
)
}
resp = requests.get(url, headers=headers, timeout=timeout)
resp.raise_for_status()
return resp.text
def extract_main_text(html: str, max_chars: int = 12000) -> str:
"""
Extract main readable text from HTML, trimming to max_chars.
"""
try:
doc = Document(html)
content_html = doc.summary()
soup = BeautifulSoup(content_html, "lxml")
text = soup.get_text(" ", strip=True)
except Exception:
soup = BeautifulSoup(html, "lxml")
text = soup.get_text(" ", strip=True)
text = re.sub(r"\s+", " ", text).strip()
if len(text) > max_chars:
text = text[:max_chars] + "โ€ฆ"
return text
# ---------- Safe-ish python tool ----------
# This is not a perfect sandbox, but it blocks imports and dangerous builtins.
# For public Spaces, keep it restrictive.
_ALLOWED_BUILTINS = {
"abs": abs,
"min": min,
"max": max,
"sum": sum,
"len": len,
"range": range,
"round": round,
"sorted": sorted,
"enumerate": enumerate,
"zip": zip,
"map": map,
"filter": filter,
"list": list,
"dict": dict,
"set": set,
"tuple": tuple,
"str": str,
"int": int,
"float": float,
"bool": bool,
"json": json,
"math": math,
}
def python_tool(code: str) -> str:
"""
Executes small python snippets.
- No imports
- Restricted builtins
- Must set a variable named `result` to return something
"""
if "import " in code or "__" in code:
return "Blocked: imports and dunder usage are not allowed."
local_vars: Dict[str, Any] = {}
global_vars: Dict[str, Any] = {"__builtins__": _ALLOWED_BUILTINS}
try:
exec(code, global_vars, local_vars)
except Exception as e:
return f"Python error: {type(e).__name__}: {e}"
if "result" not in local_vars:
return "No `result` variable found. Set `result = ...` to return output."
out = local_vars["result"]
if isinstance(out, (dict, list, tuple)):
return json.dumps(out, ensure_ascii=False, indent=2)
return str(out)