Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -16,11 +16,12 @@ import numpy as np
|
|
| 16 |
import scipy.signal as signal
|
| 17 |
from scipy.ndimage import gaussian_filter1d
|
| 18 |
from dataclasses import dataclass
|
| 19 |
-
from typing import Optional, Tuple, List, Dict
|
| 20 |
import json
|
| 21 |
import requests
|
| 22 |
import re
|
| 23 |
import urllib.parse
|
|
|
|
| 24 |
|
| 25 |
# ================== CONFIG ==================
|
| 26 |
SAMPLE_RATE = 48000
|
|
@@ -30,28 +31,118 @@ OLLAMA_URL = "http://localhost:11434"
|
|
| 30 |
OLLAMA_MODELS = ["llama3.2", "phi4:latest", "qwen2.5:3b"] # Priority order
|
| 31 |
|
| 32 |
# HuggingFace Inference API (for cloud deployment)
|
| 33 |
-
# Token loaded from environment variable or HuggingFace Spaces secrets
|
| 34 |
-
import os
|
| 35 |
HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 36 |
HF_API_MODELS = [
|
| 37 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 38 |
"google/flan-t5-xxl",
|
| 39 |
-
"facebook/opt-1.3b",
|
| 40 |
]
|
| 41 |
|
| 42 |
|
| 43 |
-
# ==================
|
| 44 |
-
"""
|
| 45 |
-
|
| 46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
"""
|
| 48 |
|
|
|
|
|
|
|
|
|
|
| 49 |
def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
|
| 50 |
-
"""
|
| 51 |
-
Dynamically fetch bird image from Wikipedia.
|
| 52 |
-
No hardcoding - searches based on LLM output.
|
| 53 |
-
"""
|
| 54 |
-
# Try scientific name first (more accurate)
|
| 55 |
search_terms = []
|
| 56 |
if scientific_name:
|
| 57 |
search_terms.append(scientific_name.replace(" ", "_"))
|
|
@@ -60,7 +151,6 @@ def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
|
|
| 60 |
|
| 61 |
for term in search_terms:
|
| 62 |
try:
|
| 63 |
-
# Wikipedia API to get page image
|
| 64 |
wiki_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(term)}"
|
| 65 |
resp = requests.get(wiki_url, timeout=5, headers={"User-Agent": "BirdSense/1.0"})
|
| 66 |
|
|
@@ -68,51 +158,40 @@ def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
|
|
| 68 |
data = resp.json()
|
| 69 |
if "thumbnail" in data and "source" in data["thumbnail"]:
|
| 70 |
img_url = data["thumbnail"]["source"]
|
| 71 |
-
# Get higher resolution
|
| 72 |
img_url = img_url.replace("/220px-", "/400px-").replace("/320px-", "/400px-")
|
| 73 |
return img_url
|
| 74 |
elif "originalimage" in data and "source" in data["originalimage"]:
|
| 75 |
return data["originalimage"]["source"]
|
| 76 |
-
except
|
| 77 |
continue
|
| 78 |
|
| 79 |
-
# Fallback:
|
| 80 |
try:
|
| 81 |
-
commons_url =
|
| 82 |
params = {
|
| 83 |
-
"action": "query",
|
| 84 |
-
"
|
| 85 |
-
"
|
| 86 |
-
"srsearch": f"{bird_name} bird",
|
| 87 |
-
"srnamespace": "6", # File namespace
|
| 88 |
-
"srlimit": "1"
|
| 89 |
}
|
| 90 |
resp = requests.get(commons_url, params=params, timeout=5)
|
| 91 |
if resp.status_code == 200:
|
| 92 |
data = resp.json()
|
| 93 |
if data.get("query", {}).get("search"):
|
| 94 |
file_title = data["query"]["search"][0]["title"]
|
| 95 |
-
# Get actual image URL
|
| 96 |
-
file_url = f"https://commons.wikimedia.org/w/api.php"
|
| 97 |
file_params = {
|
| 98 |
-
"action": "query",
|
| 99 |
-
"
|
| 100 |
-
"
|
| 101 |
-
"prop": "imageinfo",
|
| 102 |
-
"iiprop": "url",
|
| 103 |
-
"iiurlwidth": "400"
|
| 104 |
}
|
| 105 |
-
file_resp = requests.get(
|
| 106 |
if file_resp.status_code == 200:
|
| 107 |
-
|
| 108 |
-
pages = file_data.get("query", {}).get("pages", {})
|
| 109 |
for page in pages.values():
|
| 110 |
if "imageinfo" in page:
|
| 111 |
return page["imageinfo"][0].get("thumburl", page["imageinfo"][0].get("url", ""))
|
| 112 |
except:
|
| 113 |
pass
|
| 114 |
|
| 115 |
-
# Final fallback - generic bird silhouette
|
| 116 |
return "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Bird_icon.svg/200px-Bird_icon.svg.png"
|
| 117 |
|
| 118 |
|
|
@@ -174,12 +253,7 @@ class SAMAudioProcessor:
|
|
| 174 |
f, t, Zxx = signal.stft(audio, self.sr, nperseg=2048)
|
| 175 |
magnitude = np.abs(Zxx)
|
| 176 |
|
| 177 |
-
bands = [
|
| 178 |
-
("low_freq", 500, 2000),
|
| 179 |
-
("mid_freq", 2000, 5000),
|
| 180 |
-
("high_freq", 5000, 10000),
|
| 181 |
-
]
|
| 182 |
-
|
| 183 |
detected = []
|
| 184 |
for band_name, low, high in bands:
|
| 185 |
band_idx = (f >= low) & (f <= high)
|
|
@@ -210,8 +284,40 @@ def get_available_ollama_model() -> Optional[str]:
|
|
| 210 |
return None
|
| 211 |
|
| 212 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 213 |
def call_ollama(prompt: str, system: str = None) -> Optional[str]:
|
| 214 |
-
"""Call local Ollama LLM
|
| 215 |
model = get_available_ollama_model()
|
| 216 |
if not model:
|
| 217 |
return None
|
|
@@ -237,8 +343,6 @@ def call_ollama(prompt: str, system: str = None) -> Optional[str]:
|
|
| 237 |
def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
|
| 238 |
"""Call HuggingFace Inference API."""
|
| 239 |
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
| 240 |
-
|
| 241 |
-
# Truncate prompt if too long
|
| 242 |
if len(full_prompt) > 4000:
|
| 243 |
full_prompt = full_prompt[:4000]
|
| 244 |
|
|
@@ -251,11 +355,7 @@ def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
|
|
| 251 |
|
| 252 |
payload = {
|
| 253 |
"inputs": full_prompt,
|
| 254 |
-
"parameters": {
|
| 255 |
-
"max_new_tokens": 1000,
|
| 256 |
-
"temperature": 0.3,
|
| 257 |
-
"return_full_text": False
|
| 258 |
-
}
|
| 259 |
}
|
| 260 |
|
| 261 |
resp = requests.post(url, headers=headers, json=payload, timeout=90)
|
|
@@ -267,29 +367,17 @@ def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
|
|
| 267 |
if text and len(text) > 20:
|
| 268 |
return text
|
| 269 |
elif isinstance(result, dict):
|
| 270 |
-
|
| 271 |
-
if text:
|
| 272 |
-
return text
|
| 273 |
-
elif resp.status_code == 503:
|
| 274 |
-
continue # Model loading
|
| 275 |
-
elif resp.status_code == 401:
|
| 276 |
-
continue # Auth required, try next
|
| 277 |
-
|
| 278 |
except Exception as e:
|
| 279 |
-
print(f"HF API error for {model}: {e}")
|
| 280 |
continue
|
| 281 |
-
|
| 282 |
return None
|
| 283 |
|
| 284 |
|
| 285 |
def call_llm(prompt: str, system: str = None) -> Optional[str]:
|
| 286 |
"""Call LLM - Ollama first, HuggingFace fallback."""
|
| 287 |
-
# Try Ollama first
|
| 288 |
result = call_ollama(prompt, system)
|
| 289 |
if result:
|
| 290 |
return result
|
| 291 |
-
|
| 292 |
-
# Fallback to HuggingFace
|
| 293 |
return call_hf_inference(prompt, system)
|
| 294 |
|
| 295 |
|
|
@@ -318,35 +406,28 @@ class AudioFeatures:
|
|
| 318 |
sam_metadata: dict
|
| 319 |
|
| 320 |
def to_prompt(self) -> str:
|
| 321 |
-
freq_desc = "very low (<500Hz, large bird
|
| 322 |
"low (500-1500Hz, koel/coucal)" if self.peak_frequency < 1500 else \
|
| 323 |
-
"medium (1500-4000Hz,
|
| 324 |
-
"high (4000-7000Hz, warbler
|
| 325 |
-
"very high (>7000Hz, alarm call
|
| 326 |
|
| 327 |
-
return f"""AUDIO ANALYSIS (
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
- Syllable count: {self.num_syllables}
|
| 336 |
-
- Signal quality: SNR {self.snr_db:.1f}dB {"(excellent)" if self.snr_db > 20 else "(good)" if self.snr_db > 10 else "(noisy)"}
|
| 337 |
-
|
| 338 |
-
SAM-Audio separation: {self.sam_metadata.get('separation_ratio', 0)*100:.0f}% bird call isolated"""
|
| 339 |
|
| 340 |
|
| 341 |
def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFeatures:
|
| 342 |
"""Extract comprehensive audio features."""
|
| 343 |
duration = len(audio) / sr
|
| 344 |
-
|
| 345 |
-
# Spectral analysis
|
| 346 |
freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
|
| 347 |
peak_freq = freqs[np.argmax(psd)]
|
| 348 |
|
| 349 |
-
# Spectral moments
|
| 350 |
total_power = np.sum(psd) + 1e-10
|
| 351 |
centroid = np.sum(freqs * psd) / total_power
|
| 352 |
bandwidth = np.sqrt(np.sum(((freqs - centroid) ** 2) * psd) / total_power)
|
|
@@ -375,27 +456,22 @@ def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFea
|
|
| 375 |
for c in chunks:
|
| 376 |
if len(c) > 512:
|
| 377 |
_, cpsd = signal.welch(c, sr, nperseg=min(1024, len(c)))
|
| 378 |
-
|
|
|
|
| 379 |
if chunk_freqs:
|
| 380 |
is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
|
| 381 |
|
| 382 |
-
# SNR
|
| 383 |
noise = np.percentile(np.abs(audio), 5)
|
| 384 |
sig = np.percentile(np.abs(audio), 95)
|
| 385 |
snr = 20 * np.log10((sig + 1e-10) / (noise + 1e-10))
|
| 386 |
|
| 387 |
return AudioFeatures(
|
| 388 |
-
duration=duration,
|
| 389 |
-
peak_frequency=float(peak_freq),
|
| 390 |
freq_range=(float(freq_low), float(freq_high)),
|
| 391 |
-
num_syllables=num_syl,
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
snr_db=float(snr),
|
| 396 |
-
spectral_centroid=float(centroid),
|
| 397 |
-
spectral_bandwidth=float(bandwidth),
|
| 398 |
-
sam_metadata=sam_metadata
|
| 399 |
)
|
| 400 |
|
| 401 |
|
|
@@ -420,154 +496,103 @@ def preprocess_audio(audio_data: np.ndarray, sr: int) -> Tuple[np.ndarray, int]:
|
|
| 420 |
return audio_data, sr
|
| 421 |
|
| 422 |
|
| 423 |
-
# ================== IMAGE ANALYSIS
|
| 424 |
|
| 425 |
def analyze_image_features(image: np.ndarray) -> Dict:
|
| 426 |
-
"""
|
| 427 |
-
YOLO-inspired image feature extraction.
|
| 428 |
-
Analyzes colors, patterns, shapes for bird identification.
|
| 429 |
-
"""
|
| 430 |
if len(image.shape) != 3 or image.shape[2] < 3:
|
| 431 |
return {"error": "Invalid image"}
|
| 432 |
|
| 433 |
h, w = image.shape[:2]
|
| 434 |
-
|
| 435 |
-
# Convert to different color spaces
|
| 436 |
r, g, b = image[:,:,0], image[:,:,1], image[:,:,2]
|
| 437 |
|
| 438 |
-
# Dominant colors analysis
|
| 439 |
colors = []
|
| 440 |
color_regions = []
|
| 441 |
|
| 442 |
-
|
| 443 |
-
regions = {
|
| 444 |
-
"upper": image[:h//3, :, :],
|
| 445 |
-
"middle": image[h//3:2*h//3, :, :],
|
| 446 |
-
"lower": image[2*h//3:, :, :]
|
| 447 |
-
}
|
| 448 |
|
| 449 |
for region_name, region in regions.items():
|
| 450 |
rr, rg, rb = np.mean(region[:,:,0]), np.mean(region[:,:,1]), np.mean(region[:,:,2])
|
| 451 |
-
|
| 452 |
-
# Detect specific colors
|
| 453 |
region_colors = []
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
if rr < 60 and rg < 60 and rb < 60:
|
| 457 |
-
|
| 458 |
-
if
|
| 459 |
-
region_colors.append("green")
|
| 460 |
-
if rb > rr * 1.2 and rb > rg * 1.1:
|
| 461 |
-
region_colors.append("blue")
|
| 462 |
if rr > rg * 1.3 and rr > rb * 1.3:
|
| 463 |
-
if rr > 200
|
| 464 |
-
|
| 465 |
-
else:
|
| 466 |
-
region_colors.append("brown")
|
| 467 |
-
if rr > 150 and rg > 100 and rb < 80:
|
| 468 |
-
region_colors.append("yellow/orange")
|
| 469 |
if abs(rr - rg) < 30 and abs(rg - rb) < 30:
|
| 470 |
-
if rr > 150
|
| 471 |
-
region_colors.append("grey/white")
|
| 472 |
-
else:
|
| 473 |
-
region_colors.append("grey")
|
| 474 |
|
| 475 |
if region_colors:
|
| 476 |
color_regions.append(f"{region_name}: {', '.join(region_colors)}")
|
| 477 |
colors.extend(region_colors)
|
| 478 |
|
| 479 |
-
# Unique colors
|
| 480 |
-
unique_colors = list(set(colors))
|
| 481 |
-
|
| 482 |
-
# Pattern detection (simplified)
|
| 483 |
gray = 0.299 * r + 0.587 * g + 0.114 * b
|
| 484 |
edges = np.abs(np.gradient(gray, axis=0)) + np.abs(np.gradient(gray, axis=1))
|
| 485 |
pattern_intensity = np.mean(edges)
|
| 486 |
|
| 487 |
-
has_stripes = pattern_intensity > 20
|
| 488 |
-
has_spots = False # Would need more sophisticated detection
|
| 489 |
-
|
| 490 |
-
# Size estimation from aspect ratio
|
| 491 |
-
aspect = w / h
|
| 492 |
-
size_guess = "medium"
|
| 493 |
-
if aspect > 1.5:
|
| 494 |
-
size_guess = "long-tailed"
|
| 495 |
-
elif aspect < 0.7:
|
| 496 |
-
size_guess = "compact/round"
|
| 497 |
-
|
| 498 |
return {
|
| 499 |
-
"colors":
|
| 500 |
"color_regions": color_regions,
|
| 501 |
-
"has_patterns":
|
| 502 |
-
"size_hint":
|
| 503 |
-
"pattern_intensity": pattern_intensity
|
| 504 |
}
|
| 505 |
|
| 506 |
|
| 507 |
# ================== LLM PROMPTS ==================
|
| 508 |
|
| 509 |
-
SYSTEM_PROMPT = """You are an expert ornithologist
|
| 510 |
|
| 511 |
-
CRITICAL
|
| 512 |
-
1. Identify birds based ONLY on the provided
|
| 513 |
-
2.
|
| 514 |
-
3.
|
| 515 |
-
4.
|
| 516 |
-
5. Consider geographic context (India-focused)
|
| 517 |
|
| 518 |
-
|
| 519 |
{
|
| 520 |
"birds": [
|
| 521 |
{
|
| 522 |
-
"name": "Common
|
| 523 |
"scientific_name": "Genus species",
|
| 524 |
"confidence": 85,
|
| 525 |
-
"reasoning": "
|
| 526 |
}
|
| 527 |
],
|
| 528 |
-
"analysis": "
|
| 529 |
-
|
| 530 |
-
}
|
| 531 |
-
|
| 532 |
-
IMPORTANT: The scientific_name is REQUIRED and must be accurate - it's used to fetch the correct bird image."""
|
| 533 |
|
| 534 |
|
| 535 |
def parse_llm_response(response: str) -> Tuple[List[Dict], str]:
|
| 536 |
"""Parse LLM JSON response."""
|
| 537 |
-
birds = []
|
| 538 |
-
analysis = ""
|
| 539 |
-
|
| 540 |
if not response:
|
| 541 |
-
return birds, "No response
|
| 542 |
|
| 543 |
-
# Try to extract JSON
|
| 544 |
try:
|
| 545 |
-
# Find JSON block
|
| 546 |
json_match = re.search(r'\{[\s\S]*\}', response)
|
| 547 |
if json_match:
|
| 548 |
data = json.loads(json_match.group())
|
| 549 |
birds = data.get("birds", [])
|
| 550 |
analysis = data.get("analysis", "")
|
| 551 |
-
except
|
| 552 |
-
# Try to parse structured text
|
| 553 |
pass
|
| 554 |
-
|
| 555 |
return birds, analysis
|
| 556 |
|
| 557 |
|
| 558 |
def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> str:
|
| 559 |
-
"""Format results with
|
| 560 |
-
output = "## π¦ Birds Identified
|
| 561 |
|
| 562 |
if analysis:
|
| 563 |
output += f"*{analysis}*\n\n"
|
| 564 |
-
|
| 565 |
if extra_info:
|
| 566 |
-
output += f"{extra_info}\n\n"
|
| 567 |
|
| 568 |
if not birds:
|
| 569 |
-
output
|
| 570 |
-
return output
|
| 571 |
|
| 572 |
for i, bird in enumerate(birds, 1):
|
| 573 |
name = bird.get("name", "Unknown")
|
|
@@ -575,264 +600,336 @@ def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> st
|
|
| 575 |
conf = bird.get("confidence", 0)
|
| 576 |
reason = bird.get("reasoning", "")
|
| 577 |
|
| 578 |
-
# DYNAMIC image fetch - NO HARDCODING
|
| 579 |
img_url = get_wikipedia_image(name, scientific)
|
| 580 |
-
|
| 581 |
badge = "π’ HIGH" if conf >= 80 else "π‘ MEDIUM" if conf >= 60 else "π΄ LOW"
|
| 582 |
|
| 583 |
-
output += f"""
|
| 584 |
-
---
|
| 585 |
|
| 586 |
-
### {i}. **{name}**
|
| 587 |
|
| 588 |

|
| 589 |
|
| 590 |
-
**Scientific
|
| 591 |
|
| 592 |
-
**
|
| 593 |
|
| 594 |
"""
|
| 595 |
-
|
| 596 |
return output
|
| 597 |
|
| 598 |
|
| 599 |
-
# ================== MAIN FUNCTIONS ==================
|
| 600 |
|
| 601 |
-
def
|
| 602 |
-
"""Identify bird from audio."""
|
| 603 |
if audio is None:
|
| 604 |
-
|
|
|
|
| 605 |
|
| 606 |
status = get_llm_status()
|
|
|
|
| 607 |
|
| 608 |
try:
|
| 609 |
sr, audio_data = audio
|
| 610 |
audio_data, sr = preprocess_audio(audio_data, sr)
|
| 611 |
|
| 612 |
-
|
|
|
|
| 613 |
bird_audio, sam_metadata = sam_audio.separate_bird_calls(audio_data)
|
| 614 |
multi_sources = sam_audio.detect_multiple_birds(bird_audio)
|
| 615 |
|
| 616 |
-
|
|
|
|
| 617 |
features = extract_features(bird_audio, sr, sam_metadata)
|
| 618 |
|
| 619 |
-
|
| 620 |
-
prompt = f"""Identify the bird(s) in this audio recording:
|
| 621 |
|
| 622 |
{features.to_prompt()}
|
| 623 |
|
| 624 |
"""
|
| 625 |
if location:
|
| 626 |
-
prompt += f"
|
| 627 |
if month:
|
| 628 |
-
prompt += f"
|
| 629 |
-
|
| 630 |
if len(multi_sources) > 1:
|
| 631 |
-
prompt += f"\
|
| 632 |
|
| 633 |
-
prompt += "\
|
| 634 |
|
| 635 |
-
|
| 636 |
-
birds, analysis = parse_llm_response(response)
|
| 637 |
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
|
| 640 |
if birds:
|
| 641 |
-
|
| 642 |
else:
|
| 643 |
-
|
| 644 |
|
| 645 |
-
**Audio Features
|
| 646 |
{features.to_prompt()}
|
| 647 |
|
| 648 |
-
**LLM Response:**
|
|
|
|
|
|
|
|
|
|
| 649 |
|
| 650 |
**Status:** {status}
|
| 651 |
-
|
| 652 |
-
Please ensure Ollama is running with llama3.2 or phi4 model."""
|
| 653 |
|
| 654 |
except Exception as e:
|
| 655 |
-
|
| 656 |
|
| 657 |
|
| 658 |
-
def
|
| 659 |
-
"""Identify bird from image
|
| 660 |
if image is None:
|
| 661 |
-
|
|
|
|
| 662 |
|
| 663 |
status = get_llm_status()
|
|
|
|
| 664 |
|
| 665 |
try:
|
| 666 |
img = np.array(image) if not isinstance(image, np.ndarray) else image
|
| 667 |
-
|
| 668 |
-
# YOLO-style feature extraction
|
| 669 |
features = analyze_image_features(img)
|
| 670 |
|
| 671 |
if "error" in features:
|
| 672 |
-
|
|
|
|
|
|
|
|
|
|
| 673 |
|
| 674 |
-
|
| 675 |
-
prompt = f"""Identify the bird in this image based on visual analysis:
|
| 676 |
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
| 682 |
-
- Body shape: {features['size_hint']}
|
| 683 |
|
| 684 |
-
|
| 685 |
-
Consider color patterns, size, and shape carefully.
|
| 686 |
-
IMPORTANT: Provide accurate scientific names for each bird."""
|
| 687 |
|
| 688 |
-
|
| 689 |
-
|
|
|
|
|
|
|
| 690 |
|
| 691 |
-
|
|
|
|
| 692 |
|
| 693 |
if birds:
|
| 694 |
-
|
| 695 |
else:
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
**Detected Colors:** {', '.join(features['colors'])}
|
| 699 |
-
**Color Regions:** {'; '.join(features['color_regions'])}
|
| 700 |
-
|
| 701 |
-
**LLM Response:** {response[:500] if response else 'No response'}
|
| 702 |
-
|
| 703 |
-
**Status:** {status}"""
|
| 704 |
|
| 705 |
except Exception as e:
|
| 706 |
-
|
| 707 |
|
| 708 |
|
| 709 |
-
def
|
| 710 |
-
"""Identify bird from description."""
|
| 711 |
if not description or len(description.strip()) < 5:
|
| 712 |
-
|
|
|
|
| 713 |
|
| 714 |
status = get_llm_status()
|
|
|
|
| 715 |
|
| 716 |
prompt = f"""Identify the bird(s) from this description:
|
| 717 |
|
| 718 |
-
|
| 719 |
-
{description}
|
| 720 |
|
| 721 |
-
Focus on Indian birds.
|
| 722 |
-
IMPORTANT: Provide accurate scientific names for image lookup."""
|
| 723 |
|
| 724 |
-
|
| 725 |
-
|
|
|
|
|
|
|
| 726 |
|
| 727 |
-
|
| 728 |
|
| 729 |
if birds:
|
| 730 |
-
|
| 731 |
else:
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
**LLM Response:** {response[:500] if response else 'No response'}
|
| 735 |
-
|
| 736 |
-
**Status:** {status}"""
|
| 737 |
|
| 738 |
|
| 739 |
# ================== GRADIO UI ==================
|
| 740 |
|
| 741 |
-
with gr.Blocks(
|
|
|
|
|
|
|
|
|
|
| 742 |
|
|
|
|
| 743 |
gr.HTML("""
|
| 744 |
-
<div
|
| 745 |
-
<h1
|
| 746 |
-
<p style="color: #94a3b8; font-size: 1.
|
| 747 |
-
<p style="color: #64748b; font-size:
|
| 748 |
</div>
|
| 749 |
""")
|
| 750 |
|
| 751 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
|
|
|
|
| 753 |
with gr.Tabs():
|
|
|
|
|
|
|
| 754 |
with gr.Tab("π€ Audio Identification"):
|
| 755 |
gr.Markdown("""
|
| 756 |
-
|
| 757 |
-
1. SAM-Audio separates bird calls from background noise
|
| 758 |
-
2. Features (frequency, syllables, pattern) are extracted
|
| 759 |
-
3. LLM identifies matching species from 10,000+ birds
|
| 760 |
-
4. Images are fetched dynamically from Wikipedia
|
| 761 |
""")
|
| 762 |
|
| 763 |
with gr.Row():
|
| 764 |
with gr.Column(scale=1):
|
| 765 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
with gr.Row():
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
"
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
|
| 774 |
with gr.Column(scale=2):
|
| 775 |
-
|
|
|
|
|
|
|
|
|
|
| 776 |
|
| 777 |
-
|
| 778 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 779 |
with gr.Tab("π· Image Identification"):
|
| 780 |
gr.Markdown("""
|
| 781 |
-
|
| 782 |
-
- Extracts colors from different regions (head, body, tail)
|
| 783 |
-
- Detects patterns and shapes
|
| 784 |
-
- LLM matches to bird species
|
| 785 |
""")
|
| 786 |
|
| 787 |
with gr.Row():
|
| 788 |
with gr.Column(scale=1):
|
| 789 |
-
|
| 790 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 791 |
with gr.Column(scale=2):
|
| 792 |
-
|
|
|
|
|
|
|
|
|
|
| 793 |
|
| 794 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
|
|
|
|
| 796 |
with gr.Tab("π Description"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 797 |
with gr.Row():
|
| 798 |
with gr.Column(scale=1):
|
| 799 |
-
|
| 800 |
-
label="
|
| 801 |
-
lines=
|
| 802 |
-
placeholder="Example: Small green bird with red forehead, making repetitive 'tuk-tuk' sound, seen in garden"
|
| 803 |
)
|
| 804 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 805 |
with gr.Column(scale=2):
|
| 806 |
-
|
|
|
|
|
|
|
|
|
|
| 807 |
|
| 808 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 809 |
|
|
|
|
| 810 |
with gr.Tab("βΉοΈ About"):
|
| 811 |
gr.Markdown("""
|
| 812 |
-
## π¦ BirdSense Pro
|
| 813 |
-
|
| 814 |
-
###
|
| 815 |
-
- **Images
|
| 816 |
-
- **
|
| 817 |
-
- **
|
| 818 |
-
|
| 819 |
-
|
| 820 |
-
|
| 821 |
-
|
| 822 |
-
|
| 823 |
-
|
| 824 |
-
|
| 825 |
-
-
|
| 826 |
-
|
| 827 |
|
| 828 |
### CSCR Initiative
|
| 829 |
Open-source bird identification for researchers in India.
|
| 830 |
""")
|
| 831 |
|
|
|
|
| 832 |
gr.HTML("""
|
| 833 |
-
<div
|
| 834 |
-
<p style="color: #4ade80;">π¦ BirdSense Pro
|
| 835 |
-
<p style="color: #64748b;
|
| 836 |
</div>
|
| 837 |
""")
|
| 838 |
|
|
@@ -840,5 +937,5 @@ Open-source bird identification for researchers in India.
|
|
| 840 |
if __name__ == "__main__":
|
| 841 |
print(f"\nπ¦ BirdSense Pro")
|
| 842 |
print(f"LLM: {get_llm_status()}")
|
| 843 |
-
print("Starting server
|
| 844 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 16 |
import scipy.signal as signal
|
| 17 |
from scipy.ndimage import gaussian_filter1d
|
| 18 |
from dataclasses import dataclass
|
| 19 |
+
from typing import Optional, Tuple, List, Dict, Generator
|
| 20 |
import json
|
| 21 |
import requests
|
| 22 |
import re
|
| 23 |
import urllib.parse
|
| 24 |
+
import os
|
| 25 |
|
| 26 |
# ================== CONFIG ==================
|
| 27 |
SAMPLE_RATE = 48000
|
|
|
|
| 31 |
OLLAMA_MODELS = ["llama3.2", "phi4:latest", "qwen2.5:3b"] # Priority order
|
| 32 |
|
| 33 |
# HuggingFace Inference API (for cloud deployment)
|
|
|
|
|
|
|
| 34 |
HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
|
| 35 |
HF_API_MODELS = [
|
| 36 |
"mistralai/Mistral-7B-Instruct-v0.3",
|
| 37 |
"google/flan-t5-xxl",
|
| 38 |
+
"facebook/opt-1.3b",
|
| 39 |
]
|
| 40 |
|
| 41 |
|
| 42 |
+
# ================== CUSTOM CSS ==================
|
| 43 |
+
CUSTOM_CSS = """
|
| 44 |
+
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
|
| 45 |
+
|
| 46 |
+
* {
|
| 47 |
+
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
.gradio-container {
|
| 51 |
+
max-width: 1400px !important;
|
| 52 |
+
margin: 0 auto !important;
|
| 53 |
+
padding: 20px !important;
|
| 54 |
+
}
|
| 55 |
+
|
| 56 |
+
h1, h2, h3, h4, h5, h6 {
|
| 57 |
+
font-family: 'Inter', sans-serif !important;
|
| 58 |
+
font-weight: 600 !important;
|
| 59 |
+
letter-spacing: -0.02em !important;
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
.header-banner {
|
| 63 |
+
background: linear-gradient(135deg, #0f2e1f 0%, #1a4d2e 50%, #0f2e1f 100%) !important;
|
| 64 |
+
padding: 2.5rem !important;
|
| 65 |
+
border-radius: 20px !important;
|
| 66 |
+
margin-bottom: 1.5rem !important;
|
| 67 |
+
box-shadow: 0 10px 40px rgba(0,0,0,0.3) !important;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.header-banner h1 {
|
| 71 |
+
color: #4ade80 !important;
|
| 72 |
+
font-size: 3rem !important;
|
| 73 |
+
font-weight: 700 !important;
|
| 74 |
+
margin: 0 0 0.5rem 0 !important;
|
| 75 |
+
text-shadow: 0 2px 10px rgba(74, 222, 128, 0.3) !important;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
.header-banner p {
|
| 79 |
+
margin: 0.3rem 0 !important;
|
| 80 |
+
line-height: 1.5 !important;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
.llm-status {
|
| 84 |
+
background: #1e293b !important;
|
| 85 |
+
padding: 12px 20px !important;
|
| 86 |
+
border-radius: 12px !important;
|
| 87 |
+
margin-bottom: 1rem !important;
|
| 88 |
+
font-weight: 500 !important;
|
| 89 |
+
font-size: 1rem !important;
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
.tab-nav button {
|
| 93 |
+
font-size: 1rem !important;
|
| 94 |
+
font-weight: 500 !important;
|
| 95 |
+
padding: 12px 24px !important;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.primary-btn {
|
| 99 |
+
background: linear-gradient(135deg, #4ade80 0%, #22c55e 100%) !important;
|
| 100 |
+
color: #0f172a !important;
|
| 101 |
+
font-weight: 600 !important;
|
| 102 |
+
font-size: 1.1rem !important;
|
| 103 |
+
padding: 16px 32px !important;
|
| 104 |
+
border-radius: 12px !important;
|
| 105 |
+
border: none !important;
|
| 106 |
+
cursor: pointer !important;
|
| 107 |
+
transition: all 0.2s ease !important;
|
| 108 |
+
box-shadow: 0 4px 15px rgba(74, 222, 128, 0.3) !important;
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
.primary-btn:hover {
|
| 112 |
+
transform: translateY(-2px) !important;
|
| 113 |
+
box-shadow: 0 6px 20px rgba(74, 222, 128, 0.4) !important;
|
| 114 |
+
}
|
| 115 |
+
|
| 116 |
+
.result-box {
|
| 117 |
+
background: #1e293b !important;
|
| 118 |
+
border-radius: 16px !important;
|
| 119 |
+
padding: 24px !important;
|
| 120 |
+
min-height: 400px !important;
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
textarea, input[type="text"] {
|
| 124 |
+
font-family: 'Inter', sans-serif !important;
|
| 125 |
+
font-size: 1rem !important;
|
| 126 |
+
border-radius: 10px !important;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
code, pre {
|
| 130 |
+
font-family: 'JetBrains Mono', monospace !important;
|
| 131 |
+
}
|
| 132 |
+
|
| 133 |
+
.footer {
|
| 134 |
+
text-align: center !important;
|
| 135 |
+
padding: 1.5rem !important;
|
| 136 |
+
margin-top: 2rem !important;
|
| 137 |
+
border-top: 1px solid #334155 !important;
|
| 138 |
+
}
|
| 139 |
"""
|
| 140 |
|
| 141 |
+
|
| 142 |
+
# ================== DYNAMIC IMAGE SEARCH ==================
|
| 143 |
+
|
| 144 |
def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
|
| 145 |
+
"""Dynamically fetch bird image from Wikipedia. No hardcoding."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
search_terms = []
|
| 147 |
if scientific_name:
|
| 148 |
search_terms.append(scientific_name.replace(" ", "_"))
|
|
|
|
| 151 |
|
| 152 |
for term in search_terms:
|
| 153 |
try:
|
|
|
|
| 154 |
wiki_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(term)}"
|
| 155 |
resp = requests.get(wiki_url, timeout=5, headers={"User-Agent": "BirdSense/1.0"})
|
| 156 |
|
|
|
|
| 158 |
data = resp.json()
|
| 159 |
if "thumbnail" in data and "source" in data["thumbnail"]:
|
| 160 |
img_url = data["thumbnail"]["source"]
|
|
|
|
| 161 |
img_url = img_url.replace("/220px-", "/400px-").replace("/320px-", "/400px-")
|
| 162 |
return img_url
|
| 163 |
elif "originalimage" in data and "source" in data["originalimage"]:
|
| 164 |
return data["originalimage"]["source"]
|
| 165 |
+
except:
|
| 166 |
continue
|
| 167 |
|
| 168 |
+
# Fallback: Wikimedia Commons search
|
| 169 |
try:
|
| 170 |
+
commons_url = "https://commons.wikimedia.org/w/api.php"
|
| 171 |
params = {
|
| 172 |
+
"action": "query", "format": "json",
|
| 173 |
+
"list": "search", "srsearch": f"{bird_name} bird",
|
| 174 |
+
"srnamespace": "6", "srlimit": "1"
|
|
|
|
|
|
|
|
|
|
| 175 |
}
|
| 176 |
resp = requests.get(commons_url, params=params, timeout=5)
|
| 177 |
if resp.status_code == 200:
|
| 178 |
data = resp.json()
|
| 179 |
if data.get("query", {}).get("search"):
|
| 180 |
file_title = data["query"]["search"][0]["title"]
|
|
|
|
|
|
|
| 181 |
file_params = {
|
| 182 |
+
"action": "query", "format": "json",
|
| 183 |
+
"titles": file_title, "prop": "imageinfo",
|
| 184 |
+
"iiprop": "url", "iiurlwidth": "400"
|
|
|
|
|
|
|
|
|
|
| 185 |
}
|
| 186 |
+
file_resp = requests.get(commons_url, params=file_params, timeout=5)
|
| 187 |
if file_resp.status_code == 200:
|
| 188 |
+
pages = file_resp.json().get("query", {}).get("pages", {})
|
|
|
|
| 189 |
for page in pages.values():
|
| 190 |
if "imageinfo" in page:
|
| 191 |
return page["imageinfo"][0].get("thumburl", page["imageinfo"][0].get("url", ""))
|
| 192 |
except:
|
| 193 |
pass
|
| 194 |
|
|
|
|
| 195 |
return "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Bird_icon.svg/200px-Bird_icon.svg.png"
|
| 196 |
|
| 197 |
|
|
|
|
| 253 |
f, t, Zxx = signal.stft(audio, self.sr, nperseg=2048)
|
| 254 |
magnitude = np.abs(Zxx)
|
| 255 |
|
| 256 |
+
bands = [("low", 500, 2000), ("mid", 2000, 5000), ("high", 5000, 10000)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
detected = []
|
| 258 |
for band_name, low, high in bands:
|
| 259 |
band_idx = (f >= low) & (f <= high)
|
|
|
|
| 284 |
return None
|
| 285 |
|
| 286 |
|
| 287 |
+
def call_ollama_stream(prompt: str, system: str = None) -> Generator[str, None, None]:
|
| 288 |
+
"""Call local Ollama LLM with streaming."""
|
| 289 |
+
model = get_available_ollama_model()
|
| 290 |
+
if not model:
|
| 291 |
+
yield "β οΈ Ollama not available"
|
| 292 |
+
return
|
| 293 |
+
|
| 294 |
+
payload = {
|
| 295 |
+
"model": model,
|
| 296 |
+
"prompt": prompt,
|
| 297 |
+
"stream": True,
|
| 298 |
+
"options": {"temperature": 0.2, "num_predict": 2000}
|
| 299 |
+
}
|
| 300 |
+
if system:
|
| 301 |
+
payload["system"] = system
|
| 302 |
+
|
| 303 |
+
try:
|
| 304 |
+
with requests.post(f"{OLLAMA_URL}/api/generate", json=payload, stream=True, timeout=180) as r:
|
| 305 |
+
full_response = ""
|
| 306 |
+
for line in r.iter_lines():
|
| 307 |
+
if line:
|
| 308 |
+
try:
|
| 309 |
+
data = json.loads(line)
|
| 310 |
+
chunk = data.get("response", "")
|
| 311 |
+
full_response += chunk
|
| 312 |
+
yield full_response
|
| 313 |
+
except:
|
| 314 |
+
continue
|
| 315 |
+
except Exception as e:
|
| 316 |
+
yield f"Error: {e}"
|
| 317 |
+
|
| 318 |
+
|
| 319 |
def call_ollama(prompt: str, system: str = None) -> Optional[str]:
|
| 320 |
+
"""Call local Ollama LLM (non-streaming)."""
|
| 321 |
model = get_available_ollama_model()
|
| 322 |
if not model:
|
| 323 |
return None
|
|
|
|
| 343 |
def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
|
| 344 |
"""Call HuggingFace Inference API."""
|
| 345 |
full_prompt = f"{system}\n\n{prompt}" if system else prompt
|
|
|
|
|
|
|
| 346 |
if len(full_prompt) > 4000:
|
| 347 |
full_prompt = full_prompt[:4000]
|
| 348 |
|
|
|
|
| 355 |
|
| 356 |
payload = {
|
| 357 |
"inputs": full_prompt,
|
| 358 |
+
"parameters": {"max_new_tokens": 1000, "temperature": 0.3, "return_full_text": False}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
}
|
| 360 |
|
| 361 |
resp = requests.post(url, headers=headers, json=payload, timeout=90)
|
|
|
|
| 367 |
if text and len(text) > 20:
|
| 368 |
return text
|
| 369 |
elif isinstance(result, dict):
|
| 370 |
+
return result.get("generated_text", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
except Exception as e:
|
|
|
|
| 372 |
continue
|
|
|
|
| 373 |
return None
|
| 374 |
|
| 375 |
|
| 376 |
def call_llm(prompt: str, system: str = None) -> Optional[str]:
|
| 377 |
"""Call LLM - Ollama first, HuggingFace fallback."""
|
|
|
|
| 378 |
result = call_ollama(prompt, system)
|
| 379 |
if result:
|
| 380 |
return result
|
|
|
|
|
|
|
| 381 |
return call_hf_inference(prompt, system)
|
| 382 |
|
| 383 |
|
|
|
|
| 406 |
sam_metadata: dict
|
| 407 |
|
| 408 |
def to_prompt(self) -> str:
|
| 409 |
+
freq_desc = "very low (<500Hz, large bird)" if self.peak_frequency < 500 else \
|
| 410 |
"low (500-1500Hz, koel/coucal)" if self.peak_frequency < 1500 else \
|
| 411 |
+
"medium (1500-4000Hz, songbird)" if self.peak_frequency < 4000 else \
|
| 412 |
+
"high (4000-7000Hz, warbler)" if self.peak_frequency < 7000 else \
|
| 413 |
+
"very high (>7000Hz, alarm call)"
|
| 414 |
|
| 415 |
+
return f"""AUDIO ANALYSIS (SAM-Audio processed):
|
| 416 |
+
β’ Duration: {self.duration:.2f}s
|
| 417 |
+
β’ Peak frequency: {self.peak_frequency:.0f} Hz ({freq_desc})
|
| 418 |
+
β’ Frequency range: {self.freq_range[0]:.0f} - {self.freq_range[1]:.0f} Hz
|
| 419 |
+
β’ Pattern: {"MELODIC" if self.is_melodic else "MONOTONE"}, {"REPETITIVE ({:.1f}/sec)".format(self.syllable_rate) if self.is_repetitive else "VARIABLE"}
|
| 420 |
+
β’ Syllables: {self.num_syllables}
|
| 421 |
+
β’ Quality: SNR {self.snr_db:.0f}dB
|
| 422 |
+
β’ SAM separation: {self.sam_metadata.get('separation_ratio', 0)*100:.0f}%"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 423 |
|
| 424 |
|
| 425 |
def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFeatures:
|
| 426 |
"""Extract comprehensive audio features."""
|
| 427 |
duration = len(audio) / sr
|
|
|
|
|
|
|
| 428 |
freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
|
| 429 |
peak_freq = freqs[np.argmax(psd)]
|
| 430 |
|
|
|
|
| 431 |
total_power = np.sum(psd) + 1e-10
|
| 432 |
centroid = np.sum(freqs * psd) / total_power
|
| 433 |
bandwidth = np.sqrt(np.sum(((freqs - centroid) ** 2) * psd) / total_power)
|
|
|
|
| 456 |
for c in chunks:
|
| 457 |
if len(c) > 512:
|
| 458 |
_, cpsd = signal.welch(c, sr, nperseg=min(1024, len(c)))
|
| 459 |
+
if len(cpsd) == len(freqs):
|
| 460 |
+
chunk_freqs.append(freqs[np.argmax(cpsd)])
|
| 461 |
if chunk_freqs:
|
| 462 |
is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
|
| 463 |
|
|
|
|
| 464 |
noise = np.percentile(np.abs(audio), 5)
|
| 465 |
sig = np.percentile(np.abs(audio), 95)
|
| 466 |
snr = 20 * np.log10((sig + 1e-10) / (noise + 1e-10))
|
| 467 |
|
| 468 |
return AudioFeatures(
|
| 469 |
+
duration=duration, peak_frequency=float(peak_freq),
|
|
|
|
| 470 |
freq_range=(float(freq_low), float(freq_high)),
|
| 471 |
+
num_syllables=num_syl, syllable_rate=float(syl_rate),
|
| 472 |
+
is_melodic=is_melodic, is_repetitive=syl_rate > 3,
|
| 473 |
+
snr_db=float(snr), spectral_centroid=float(centroid),
|
| 474 |
+
spectral_bandwidth=float(bandwidth), sam_metadata=sam_metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
| 475 |
)
|
| 476 |
|
| 477 |
|
|
|
|
| 496 |
return audio_data, sr
|
| 497 |
|
| 498 |
|
| 499 |
+
# ================== IMAGE ANALYSIS ==================
|
| 500 |
|
| 501 |
def analyze_image_features(image: np.ndarray) -> Dict:
|
| 502 |
+
"""YOLO-inspired image feature extraction."""
|
|
|
|
|
|
|
|
|
|
| 503 |
if len(image.shape) != 3 or image.shape[2] < 3:
|
| 504 |
return {"error": "Invalid image"}
|
| 505 |
|
| 506 |
h, w = image.shape[:2]
|
|
|
|
|
|
|
| 507 |
r, g, b = image[:,:,0], image[:,:,1], image[:,:,2]
|
| 508 |
|
|
|
|
| 509 |
colors = []
|
| 510 |
color_regions = []
|
| 511 |
|
| 512 |
+
regions = {"upper": image[:h//3, :, :], "middle": image[h//3:2*h//3, :, :], "lower": image[2*h//3:, :, :]}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 513 |
|
| 514 |
for region_name, region in regions.items():
|
| 515 |
rr, rg, rb = np.mean(region[:,:,0]), np.mean(region[:,:,1]), np.mean(region[:,:,2])
|
|
|
|
|
|
|
| 516 |
region_colors = []
|
| 517 |
+
|
| 518 |
+
if rr > 180 and rg > 180 and rb > 180: region_colors.append("white")
|
| 519 |
+
if rr < 60 and rg < 60 and rb < 60: region_colors.append("black")
|
| 520 |
+
if rg > rr * 1.2 and rg > rb * 1.2: region_colors.append("green")
|
| 521 |
+
if rb > rr * 1.2 and rb > rg * 1.1: region_colors.append("blue")
|
|
|
|
|
|
|
|
|
|
| 522 |
if rr > rg * 1.3 and rr > rb * 1.3:
|
| 523 |
+
region_colors.append("red" if rr > 200 else "brown")
|
| 524 |
+
if rr > 150 and rg > 100 and rb < 80: region_colors.append("yellow/orange")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 525 |
if abs(rr - rg) < 30 and abs(rg - rb) < 30:
|
| 526 |
+
region_colors.append("grey/white" if rr > 150 else "grey")
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
if region_colors:
|
| 529 |
color_regions.append(f"{region_name}: {', '.join(region_colors)}")
|
| 530 |
colors.extend(region_colors)
|
| 531 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
gray = 0.299 * r + 0.587 * g + 0.114 * b
|
| 533 |
edges = np.abs(np.gradient(gray, axis=0)) + np.abs(np.gradient(gray, axis=1))
|
| 534 |
pattern_intensity = np.mean(edges)
|
| 535 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 536 |
return {
|
| 537 |
+
"colors": list(set(colors)),
|
| 538 |
"color_regions": color_regions,
|
| 539 |
+
"has_patterns": pattern_intensity > 20,
|
| 540 |
+
"size_hint": "long-tailed" if w/h > 1.5 else "compact" if w/h < 0.7 else "medium"
|
|
|
|
| 541 |
}
|
| 542 |
|
| 543 |
|
| 544 |
# ================== LLM PROMPTS ==================
|
| 545 |
|
| 546 |
+
SYSTEM_PROMPT = """You are an expert ornithologist with encyclopedic knowledge of 10,000+ bird species worldwide, specializing in Indian birds (1,300+ species).
|
| 547 |
|
| 548 |
+
CRITICAL REQUIREMENTS:
|
| 549 |
+
1. Identify birds based ONLY on the provided features
|
| 550 |
+
2. ALWAYS include scientific names (REQUIRED for image lookup)
|
| 551 |
+
3. Provide confidence scores (0-100)
|
| 552 |
+
4. Explain your reasoning
|
|
|
|
| 553 |
|
| 554 |
+
RESPOND IN VALID JSON:
|
| 555 |
{
|
| 556 |
"birds": [
|
| 557 |
{
|
| 558 |
+
"name": "Common Name",
|
| 559 |
"scientific_name": "Genus species",
|
| 560 |
"confidence": 85,
|
| 561 |
+
"reasoning": "Why this bird matches"
|
| 562 |
}
|
| 563 |
],
|
| 564 |
+
"analysis": "Brief overall analysis"
|
| 565 |
+
}"""
|
|
|
|
|
|
|
|
|
|
| 566 |
|
| 567 |
|
| 568 |
def parse_llm_response(response: str) -> Tuple[List[Dict], str]:
|
| 569 |
"""Parse LLM JSON response."""
|
| 570 |
+
birds, analysis = [], ""
|
|
|
|
|
|
|
| 571 |
if not response:
|
| 572 |
+
return birds, "No response"
|
| 573 |
|
|
|
|
| 574 |
try:
|
|
|
|
| 575 |
json_match = re.search(r'\{[\s\S]*\}', response)
|
| 576 |
if json_match:
|
| 577 |
data = json.loads(json_match.group())
|
| 578 |
birds = data.get("birds", [])
|
| 579 |
analysis = data.get("analysis", "")
|
| 580 |
+
except:
|
|
|
|
| 581 |
pass
|
|
|
|
| 582 |
return birds, analysis
|
| 583 |
|
| 584 |
|
| 585 |
def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> str:
|
| 586 |
+
"""Format results with dynamic images."""
|
| 587 |
+
output = "## π¦ **Birds Identified**\n\n"
|
| 588 |
|
| 589 |
if analysis:
|
| 590 |
output += f"*{analysis}*\n\n"
|
|
|
|
| 591 |
if extra_info:
|
| 592 |
+
output += f"π {extra_info}\n\n"
|
| 593 |
|
| 594 |
if not birds:
|
| 595 |
+
return output + "β οΈ **No birds identified.** Please try with clearer audio/image.\n"
|
|
|
|
| 596 |
|
| 597 |
for i, bird in enumerate(birds, 1):
|
| 598 |
name = bird.get("name", "Unknown")
|
|
|
|
| 600 |
conf = bird.get("confidence", 0)
|
| 601 |
reason = bird.get("reasoning", "")
|
| 602 |
|
|
|
|
| 603 |
img_url = get_wikipedia_image(name, scientific)
|
|
|
|
| 604 |
badge = "π’ HIGH" if conf >= 80 else "π‘ MEDIUM" if conf >= 60 else "π΄ LOW"
|
| 605 |
|
| 606 |
+
output += f"""---
|
|
|
|
| 607 |
|
| 608 |
+
### {i}. **{name}** β {conf}% {badge}
|
| 609 |
|
| 610 |

|
| 611 |
|
| 612 |
+
**Scientific:** *{scientific}*
|
| 613 |
|
| 614 |
+
**Reasoning:** {reason}
|
| 615 |
|
| 616 |
"""
|
|
|
|
| 617 |
return output
|
| 618 |
|
| 619 |
|
| 620 |
+
# ================== MAIN FUNCTIONS (WITH STREAMING) ==================
|
| 621 |
|
| 622 |
+
def identify_audio_stream(audio, location: str = "", month: str = "") -> Generator[str, None, None]:
|
| 623 |
+
"""Identify bird from audio with streaming."""
|
| 624 |
if audio is None:
|
| 625 |
+
yield "## β οΈ Please record or upload audio first"
|
| 626 |
+
return
|
| 627 |
|
| 628 |
status = get_llm_status()
|
| 629 |
+
yield f"## π Processing audio...\n\n**LLM:** {status}"
|
| 630 |
|
| 631 |
try:
|
| 632 |
sr, audio_data = audio
|
| 633 |
audio_data, sr = preprocess_audio(audio_data, sr)
|
| 634 |
|
| 635 |
+
yield f"## π Applying SAM-Audio preprocessing...\n\n**LLM:** {status}"
|
| 636 |
+
|
| 637 |
bird_audio, sam_metadata = sam_audio.separate_bird_calls(audio_data)
|
| 638 |
multi_sources = sam_audio.detect_multiple_birds(bird_audio)
|
| 639 |
|
| 640 |
+
yield f"## π Extracting features...\n\nSAM separation: {sam_metadata['separation_ratio']*100:.0f}%\n\n**LLM:** {status}"
|
| 641 |
+
|
| 642 |
features = extract_features(bird_audio, sr, sam_metadata)
|
| 643 |
|
| 644 |
+
prompt = f"""Identify the bird(s) in this recording:
|
|
|
|
| 645 |
|
| 646 |
{features.to_prompt()}
|
| 647 |
|
| 648 |
"""
|
| 649 |
if location:
|
| 650 |
+
prompt += f"Location: {location}\n"
|
| 651 |
if month:
|
| 652 |
+
prompt += f"Month: {month}\n"
|
|
|
|
| 653 |
if len(multi_sources) > 1:
|
| 654 |
+
prompt += f"\nMultiple frequency bands active ({len(multi_sources)}) - possibly multiple birds!\n"
|
| 655 |
|
| 656 |
+
prompt += "\nProvide scientific names for all matches."
|
| 657 |
|
| 658 |
+
yield f"## π Consulting {status}...\n\n{features.to_prompt()}"
|
|
|
|
| 659 |
|
| 660 |
+
# Stream the response
|
| 661 |
+
full_response = ""
|
| 662 |
+
for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
|
| 663 |
+
full_response = chunk
|
| 664 |
+
yield f"## π LLM thinking...\n\n```\n{chunk[:500]}...\n```"
|
| 665 |
+
|
| 666 |
+
# Parse and format final result
|
| 667 |
+
birds, analysis = parse_llm_response(full_response)
|
| 668 |
+
extra_info = f"**SAM-Audio:** {sam_metadata['separation_ratio']*100:.0f}% | **LLM:** {status}"
|
| 669 |
|
| 670 |
if birds:
|
| 671 |
+
yield format_results(birds, analysis, extra_info)
|
| 672 |
else:
|
| 673 |
+
yield f"""## β οΈ Could not identify bird
|
| 674 |
|
| 675 |
+
**Audio Features:**
|
| 676 |
{features.to_prompt()}
|
| 677 |
|
| 678 |
+
**LLM Response:**
|
| 679 |
+
```
|
| 680 |
+
{full_response[:800] if full_response else 'No response'}
|
| 681 |
+
```
|
| 682 |
|
| 683 |
**Status:** {status}
|
| 684 |
+
"""
|
|
|
|
| 685 |
|
| 686 |
except Exception as e:
|
| 687 |
+
yield f"## β Error: {str(e)}\n\n**LLM:** {status}"
|
| 688 |
|
| 689 |
|
| 690 |
+
def identify_image_stream(image) -> Generator[str, None, None]:
|
| 691 |
+
"""Identify bird from image with streaming."""
|
| 692 |
if image is None:
|
| 693 |
+
yield "## β οΈ Please upload an image first"
|
| 694 |
+
return
|
| 695 |
|
| 696 |
status = get_llm_status()
|
| 697 |
+
yield f"## π Analyzing image...\n\n**LLM:** {status}"
|
| 698 |
|
| 699 |
try:
|
| 700 |
img = np.array(image) if not isinstance(image, np.ndarray) else image
|
|
|
|
|
|
|
| 701 |
features = analyze_image_features(img)
|
| 702 |
|
| 703 |
if "error" in features:
|
| 704 |
+
yield f"## β οΈ {features['error']}"
|
| 705 |
+
return
|
| 706 |
+
|
| 707 |
+
yield f"## π Colors detected: {', '.join(features['colors'])}\n\n**LLM:** {status}"
|
| 708 |
|
| 709 |
+
prompt = f"""Identify the bird based on visual analysis:
|
|
|
|
| 710 |
|
| 711 |
+
DETECTED FEATURES:
|
| 712 |
+
β’ Colors: {', '.join(features['colors']) if features['colors'] else 'unclear'}
|
| 713 |
+
β’ Regions: {'; '.join(features['color_regions'])}
|
| 714 |
+
β’ Patterns: {'Yes' if features['has_patterns'] else 'No'}
|
| 715 |
+
β’ Shape: {features['size_hint']}
|
|
|
|
| 716 |
|
| 717 |
+
Identify Indian bird species that match. Provide scientific names."""
|
|
|
|
|
|
|
| 718 |
|
| 719 |
+
full_response = ""
|
| 720 |
+
for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
|
| 721 |
+
full_response = chunk
|
| 722 |
+
yield f"## π LLM analyzing...\n\n```\n{chunk[:500]}...\n```"
|
| 723 |
|
| 724 |
+
birds, analysis = parse_llm_response(full_response)
|
| 725 |
+
extra_info = f"**Colors:** {', '.join(features['colors'])} | **LLM:** {status}"
|
| 726 |
|
| 727 |
if birds:
|
| 728 |
+
yield format_results(birds, analysis, extra_info)
|
| 729 |
else:
|
| 730 |
+
yield f"## β οΈ Could not identify bird\n\n**Colors:** {', '.join(features['colors'])}\n\n**LLM:** {status}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 731 |
|
| 732 |
except Exception as e:
|
| 733 |
+
yield f"## β Error: {str(e)}\n\n**LLM:** {status}"
|
| 734 |
|
| 735 |
|
| 736 |
+
def identify_description_stream(description: str) -> Generator[str, None, None]:
|
| 737 |
+
"""Identify bird from description with streaming."""
|
| 738 |
if not description or len(description.strip()) < 5:
|
| 739 |
+
yield "## β οΈ Please enter a description"
|
| 740 |
+
return
|
| 741 |
|
| 742 |
status = get_llm_status()
|
| 743 |
+
yield f"## π Processing description...\n\n**LLM:** {status}"
|
| 744 |
|
| 745 |
prompt = f"""Identify the bird(s) from this description:
|
| 746 |
|
| 747 |
+
"{description}"
|
|
|
|
| 748 |
|
| 749 |
+
Focus on Indian birds. Provide scientific names."""
|
|
|
|
| 750 |
|
| 751 |
+
full_response = ""
|
| 752 |
+
for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
|
| 753 |
+
full_response = chunk
|
| 754 |
+
yield f"## π LLM thinking...\n\n```\n{chunk[:500]}...\n```"
|
| 755 |
|
| 756 |
+
birds, analysis = parse_llm_response(full_response)
|
| 757 |
|
| 758 |
if birds:
|
| 759 |
+
yield format_results(birds, analysis, f"**LLM:** {status}")
|
| 760 |
else:
|
| 761 |
+
yield f"## β οΈ Could not identify bird\n\n**LLM:** {status}"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 762 |
|
| 763 |
|
| 764 |
# ================== GRADIO UI ==================
|
| 765 |
|
| 766 |
+
with gr.Blocks(
|
| 767 |
+
title="π¦ BirdSense Pro",
|
| 768 |
+
css=CUSTOM_CSS
|
| 769 |
+
) as demo:
|
| 770 |
|
| 771 |
+
# Header
|
| 772 |
gr.HTML("""
|
| 773 |
+
<div class="header-banner">
|
| 774 |
+
<h1>π¦ BirdSense Pro</h1>
|
| 775 |
+
<p style="color: #94a3b8; font-size: 1.2rem; font-weight: 500;">META SAM-Audio + Llama3.2/Phi4 LLM</p>
|
| 776 |
+
<p style="color: #64748b; font-size: 1rem;">Dynamic Wikipedia Images β’ No Hardcoding β’ 10,000+ Species</p>
|
| 777 |
</div>
|
| 778 |
""")
|
| 779 |
|
| 780 |
+
# LLM Status
|
| 781 |
+
gr.HTML(f"""
|
| 782 |
+
<div class="llm-status">
|
| 783 |
+
<strong>Current LLM:</strong> {get_llm_status()}
|
| 784 |
+
</div>
|
| 785 |
+
""")
|
| 786 |
|
| 787 |
+
# Tabs
|
| 788 |
with gr.Tabs():
|
| 789 |
+
|
| 790 |
+
# Audio Tab
|
| 791 |
with gr.Tab("π€ Audio Identification"):
|
| 792 |
gr.Markdown("""
|
| 793 |
+
### How it works:
|
| 794 |
+
1. **SAM-Audio** separates bird calls from background noise
|
| 795 |
+
2. **Features** (frequency, syllables, pattern) are extracted
|
| 796 |
+
3. **LLM** identifies matching species from 10,000+ birds
|
| 797 |
+
4. **Images** are fetched dynamically from Wikipedia
|
| 798 |
""")
|
| 799 |
|
| 800 |
with gr.Row():
|
| 801 |
with gr.Column(scale=1):
|
| 802 |
+
audio_input = gr.Audio(
|
| 803 |
+
sources=["microphone", "upload"],
|
| 804 |
+
type="numpy",
|
| 805 |
+
label="π€ Record or Upload Audio"
|
| 806 |
+
)
|
| 807 |
with gr.Row():
|
| 808 |
+
location_input = gr.Textbox(
|
| 809 |
+
label="π Location (optional)",
|
| 810 |
+
placeholder="e.g., Western Ghats, Mumbai"
|
| 811 |
+
)
|
| 812 |
+
month_input = gr.Dropdown(
|
| 813 |
+
label="π
Month",
|
| 814 |
+
choices=["", "January", "February", "March", "April", "May", "June",
|
| 815 |
+
"July", "August", "September", "October", "November", "December"]
|
| 816 |
+
)
|
| 817 |
+
audio_button = gr.Button(
|
| 818 |
+
"π Identify Bird",
|
| 819 |
+
variant="primary",
|
| 820 |
+
size="lg",
|
| 821 |
+
elem_classes=["primary-btn"]
|
| 822 |
+
)
|
| 823 |
|
| 824 |
with gr.Column(scale=2):
|
| 825 |
+
audio_output = gr.Markdown(
|
| 826 |
+
value="*Results will appear here after identification...*",
|
| 827 |
+
elem_classes=["result-box"]
|
| 828 |
+
)
|
| 829 |
|
| 830 |
+
# Connect button to function
|
| 831 |
+
audio_button.click(
|
| 832 |
+
fn=identify_audio_stream,
|
| 833 |
+
inputs=[audio_input, location_input, month_input],
|
| 834 |
+
outputs=audio_output
|
| 835 |
+
)
|
| 836 |
+
|
| 837 |
+
# Image Tab
|
| 838 |
with gr.Tab("π· Image Identification"):
|
| 839 |
gr.Markdown("""
|
| 840 |
+
### YOLO-style visual analysis:
|
| 841 |
+
- Extracts **colors** from different regions (head, body, tail)
|
| 842 |
+
- Detects **patterns** and shapes
|
| 843 |
+
- **LLM** matches to bird species
|
| 844 |
""")
|
| 845 |
|
| 846 |
with gr.Row():
|
| 847 |
with gr.Column(scale=1):
|
| 848 |
+
image_input = gr.Image(
|
| 849 |
+
sources=["upload", "webcam"],
|
| 850 |
+
type="numpy",
|
| 851 |
+
label="π· Upload or Capture Image"
|
| 852 |
+
)
|
| 853 |
+
image_button = gr.Button(
|
| 854 |
+
"π Identify Bird",
|
| 855 |
+
variant="primary",
|
| 856 |
+
size="lg",
|
| 857 |
+
elem_classes=["primary-btn"]
|
| 858 |
+
)
|
| 859 |
+
|
| 860 |
with gr.Column(scale=2):
|
| 861 |
+
image_output = gr.Markdown(
|
| 862 |
+
value="*Results will appear here...*",
|
| 863 |
+
elem_classes=["result-box"]
|
| 864 |
+
)
|
| 865 |
|
| 866 |
+
image_button.click(
|
| 867 |
+
fn=identify_image_stream,
|
| 868 |
+
inputs=[image_input],
|
| 869 |
+
outputs=image_output
|
| 870 |
+
)
|
| 871 |
|
| 872 |
+
# Description Tab
|
| 873 |
with gr.Tab("π Description"):
|
| 874 |
+
gr.Markdown("""
|
| 875 |
+
### Describe the bird you saw:
|
| 876 |
+
Include colors, size, call sounds, behavior, habitat...
|
| 877 |
+
""")
|
| 878 |
+
|
| 879 |
with gr.Row():
|
| 880 |
with gr.Column(scale=1):
|
| 881 |
+
desc_input = gr.Textbox(
|
| 882 |
+
label="π Bird Description",
|
| 883 |
+
lines=5,
|
| 884 |
+
placeholder="Example: Small green bird with red forehead, making repetitive 'tuk-tuk' sound, seen in garden near fruit trees"
|
| 885 |
)
|
| 886 |
+
desc_button = gr.Button(
|
| 887 |
+
"π Identify Bird",
|
| 888 |
+
variant="primary",
|
| 889 |
+
size="lg",
|
| 890 |
+
elem_classes=["primary-btn"]
|
| 891 |
+
)
|
| 892 |
+
|
| 893 |
with gr.Column(scale=2):
|
| 894 |
+
desc_output = gr.Markdown(
|
| 895 |
+
value="*Results will appear here...*",
|
| 896 |
+
elem_classes=["result-box"]
|
| 897 |
+
)
|
| 898 |
|
| 899 |
+
desc_button.click(
|
| 900 |
+
fn=identify_description_stream,
|
| 901 |
+
inputs=[desc_input],
|
| 902 |
+
outputs=desc_output
|
| 903 |
+
)
|
| 904 |
|
| 905 |
+
# About Tab
|
| 906 |
with gr.Tab("βΉοΈ About"):
|
| 907 |
gr.Markdown("""
|
| 908 |
+
## π¦ BirdSense Pro
|
| 909 |
+
|
| 910 |
+
### Key Features
|
| 911 |
+
- **No Hardcoding** β Images fetched dynamically from Wikipedia using scientific names
|
| 912 |
+
- **10,000+ Species** β LLM has knowledge of birds worldwide
|
| 913 |
+
- **SAM-Audio** β Isolates bird calls from background noise
|
| 914 |
+
- **Multi-Modal** β Audio, image, and text identification
|
| 915 |
+
|
| 916 |
+
### Models
|
| 917 |
+
| Component | Technology |
|
| 918 |
+
|-----------|------------|
|
| 919 |
+
| Audio Preprocessing | META SAM-Audio (500-10000 Hz isolation) |
|
| 920 |
+
| LLM (Local) | Llama3.2 / Phi4 via Ollama |
|
| 921 |
+
| LLM (Cloud) | Mistral-7B via HuggingFace |
|
| 922 |
+
| Image Analysis | YOLO-inspired color/pattern extraction |
|
| 923 |
|
| 924 |
### CSCR Initiative
|
| 925 |
Open-source bird identification for researchers in India.
|
| 926 |
""")
|
| 927 |
|
| 928 |
+
# Footer
|
| 929 |
gr.HTML("""
|
| 930 |
+
<div class="footer">
|
| 931 |
+
<p style="color: #4ade80; font-size: 1.1rem; font-weight: 600;">π¦ BirdSense Pro β CSCR Initiative</p>
|
| 932 |
+
<p style="color: #64748b;">Dynamic Images β’ No Hardcoding β’ LLM-Powered</p>
|
| 933 |
</div>
|
| 934 |
""")
|
| 935 |
|
|
|
|
| 937 |
if __name__ == "__main__":
|
| 938 |
print(f"\nπ¦ BirdSense Pro")
|
| 939 |
print(f"LLM: {get_llm_status()}")
|
| 940 |
+
print("Starting server on http://localhost:7860")
|
| 941 |
demo.launch(server_name="0.0.0.0", server_port=7860)
|