Spaces:

sohiyiy
/

birdsense-pro

Running

App Files Files Community

sohiyiy commited on 18 days ago

Commit

0e96d6b

verified ·

1 Parent(s): b2f9387

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +406 -309

app.py CHANGED Viewed

@@ -16,11 +16,12 @@ import numpy as np
 import scipy.signal as signal
 from scipy.ndimage import gaussian_filter1d
 from dataclasses import dataclass
-from typing import Optional, Tuple, List, Dict
 import json
 import requests
 import re
 import urllib.parse
 # ================== CONFIG ==================
 SAMPLE_RATE = 48000
@@ -30,28 +31,118 @@ OLLAMA_URL = "http://localhost:11434"
 OLLAMA_MODELS = ["llama3.2", "phi4:latest", "qwen2.5:3b"]  # Priority order
 # HuggingFace Inference API (for cloud deployment)
-# Token loaded from environment variable or HuggingFace Spaces secrets
-import os
 HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
 HF_API_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "google/flan-t5-xxl",
-    "facebook/opt-1.3b",  # Free, no auth needed
 ]
-# ================== DYNAMIC IMAGE SEARCH ==================
-"""
-NO HARDCODED IMAGES!
-Fetches bird images dynamically from Wikipedia/Wikimedia Commons
 """
 def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
-    """
-    Dynamically fetch bird image from Wikipedia.
-    No hardcoding - searches based on LLM output.
-    """
-    # Try scientific name first (more accurate)
     search_terms = []
     if scientific_name:
         search_terms.append(scientific_name.replace(" ", "_"))
@@ -60,7 +151,6 @@ def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
     for term in search_terms:
         try:
-            # Wikipedia API to get page image
             wiki_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(term)}"
             resp = requests.get(wiki_url, timeout=5, headers={"User-Agent": "BirdSense/1.0"})
@@ -68,51 +158,40 @@ def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
                 data = resp.json()
                 if "thumbnail" in data and "source" in data["thumbnail"]:
                     img_url = data["thumbnail"]["source"]
-                    # Get higher resolution
                     img_url = img_url.replace("/220px-", "/400px-").replace("/320px-", "/400px-")
                     return img_url
                 elif "originalimage" in data and "source" in data["originalimage"]:
                     return data["originalimage"]["source"]
-        except Exception as e:
             continue
-    # Fallback: Search Wikimedia Commons
     try:
-        commons_url = f"https://commons.wikimedia.org/w/api.php"
         params = {
-            "action": "query",
-            "format": "json",
-            "list": "search",
-            "srsearch": f"{bird_name} bird",
-            "srnamespace": "6",  # File namespace
-            "srlimit": "1"
         }
         resp = requests.get(commons_url, params=params, timeout=5)
         if resp.status_code == 200:
             data = resp.json()
             if data.get("query", {}).get("search"):
                 file_title = data["query"]["search"][0]["title"]
-                # Get actual image URL
-                file_url = f"https://commons.wikimedia.org/w/api.php"
                 file_params = {
-                    "action": "query",
-                    "format": "json",
-                    "titles": file_title,
-                    "prop": "imageinfo",
-                    "iiprop": "url",
-                    "iiurlwidth": "400"
                 }
-                file_resp = requests.get(file_url, params=file_params, timeout=5)
                 if file_resp.status_code == 200:
-                    file_data = file_resp.json()
-                    pages = file_data.get("query", {}).get("pages", {})
                     for page in pages.values():
                         if "imageinfo" in page:
                             return page["imageinfo"][0].get("thumburl", page["imageinfo"][0].get("url", ""))
     except:
         pass
-    # Final fallback - generic bird silhouette
     return "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Bird_icon.svg/200px-Bird_icon.svg.png"
@@ -174,12 +253,7 @@ class SAMAudioProcessor:
         f, t, Zxx = signal.stft(audio, self.sr, nperseg=2048)
         magnitude = np.abs(Zxx)
-        bands = [
-            ("low_freq", 500, 2000),
-            ("mid_freq", 2000, 5000),
-            ("high_freq", 5000, 10000),
-        ]
         detected = []
         for band_name, low, high in bands:
             band_idx = (f >= low) & (f <= high)
@@ -210,8 +284,40 @@ def get_available_ollama_model() -> Optional[str]:
     return None
 def call_ollama(prompt: str, system: str = None) -> Optional[str]:
-    """Call local Ollama LLM with best available model."""
     model = get_available_ollama_model()
     if not model:
         return None
@@ -237,8 +343,6 @@ def call_ollama(prompt: str, system: str = None) -> Optional[str]:
 def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
     """Call HuggingFace Inference API."""
     full_prompt = f"{system}\n\n{prompt}" if system else prompt
-    # Truncate prompt if too long
     if len(full_prompt) > 4000:
         full_prompt = full_prompt[:4000]
@@ -251,11 +355,7 @@ def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
             payload = {
                 "inputs": full_prompt,
-                "parameters": {
-                    "max_new_tokens": 1000,
-                    "temperature": 0.3,
-                    "return_full_text": False
-                }
             }
             resp = requests.post(url, headers=headers, json=payload, timeout=90)
@@ -267,29 +367,17 @@ def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
                     if text and len(text) > 20:
                         return text
                 elif isinstance(result, dict):
-                    text = result.get("generated_text", "")
-                    if text:
-                        return text
-            elif resp.status_code == 503:
-                continue  # Model loading
-            elif resp.status_code == 401:
-                continue  # Auth required, try next
         except Exception as e:
-            print(f"HF API error for {model}: {e}")
             continue
     return None
 def call_llm(prompt: str, system: str = None) -> Optional[str]:
     """Call LLM - Ollama first, HuggingFace fallback."""
-    # Try Ollama first
     result = call_ollama(prompt, system)
     if result:
         return result
-    # Fallback to HuggingFace
     return call_hf_inference(prompt, system)
@@ -318,35 +406,28 @@ class AudioFeatures:
     sam_metadata: dict
     def to_prompt(self) -> str:
-        freq_desc = "very low (<500Hz, large bird like crow/cuckoo)" if self.peak_frequency < 500 else \
                    "low (500-1500Hz, koel/coucal)" if self.peak_frequency < 1500 else \
-                   "medium (1500-4000Hz, typical songbird)" if self.peak_frequency < 4000 else \
-                   "high (4000-7000Hz, warbler/sunbird)" if self.peak_frequency < 7000 else \
-                   "very high (>7000Hz, alarm call/small bird)"
-        return f"""AUDIO ANALYSIS (after SAM-Audio bird call separation):
-- Duration: {self.duration:.2f} seconds
-- Peak frequency: {self.peak_frequency:.0f} Hz ({freq_desc})
-- Frequency range: {self.freq_range[0]:.0f} - {self.freq_range[1]:.0f} Hz
-- Spectral centroid: {self.spectral_centroid:.0f} Hz
-- Spectral bandwidth: {self.spectral_bandwidth:.0f} Hz
-- Call pattern: {"MELODIC (varying pitch)" if self.is_melodic else "MONOTONE (steady pitch)"}
-- Repetition: {"REPETITIVE ({:.1f} syllables/sec)".format(self.syllable_rate) if self.is_repetitive else "VARIABLE pattern"}
-- Syllable count: {self.num_syllables}
-- Signal quality: SNR {self.snr_db:.1f}dB {"(excellent)" if self.snr_db > 20 else "(good)" if self.snr_db > 10 else "(noisy)"}
-SAM-Audio separation: {self.sam_metadata.get('separation_ratio', 0)*100:.0f}% bird call isolated"""
 def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFeatures:
     """Extract comprehensive audio features."""
     duration = len(audio) / sr
-    # Spectral analysis
     freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
     peak_freq = freqs[np.argmax(psd)]
-    # Spectral moments
     total_power = np.sum(psd) + 1e-10
     centroid = np.sum(freqs * psd) / total_power
     bandwidth = np.sqrt(np.sum(((freqs - centroid) ** 2) * psd) / total_power)
@@ -375,27 +456,22 @@ def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFea
         for c in chunks:
             if len(c) > 512:
                 _, cpsd = signal.welch(c, sr, nperseg=min(1024, len(c)))
-                chunk_freqs.append(freqs[np.argmax(cpsd)] if len(cpsd) == len(freqs) else peak_freq)
         if chunk_freqs:
             is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
-    # SNR
     noise = np.percentile(np.abs(audio), 5)
     sig = np.percentile(np.abs(audio), 95)
     snr = 20 * np.log10((sig + 1e-10) / (noise + 1e-10))
     return AudioFeatures(
-        duration=duration,
-        peak_frequency=float(peak_freq),
         freq_range=(float(freq_low), float(freq_high)),
-        num_syllables=num_syl,
-        syllable_rate=float(syl_rate),
-        is_melodic=is_melodic,
-        is_repetitive=syl_rate > 3,
-        snr_db=float(snr),
-        spectral_centroid=float(centroid),
-        spectral_bandwidth=float(bandwidth),
-        sam_metadata=sam_metadata
     )
@@ -420,154 +496,103 @@ def preprocess_audio(audio_data: np.ndarray, sr: int) -> Tuple[np.ndarray, int]:
     return audio_data, sr
-# ================== IMAGE ANALYSIS (YOLO-style) ==================
 def analyze_image_features(image: np.ndarray) -> Dict:
-    """
-    YOLO-inspired image feature extraction.
-    Analyzes colors, patterns, shapes for bird identification.
-    """
     if len(image.shape) != 3 or image.shape[2] < 3:
         return {"error": "Invalid image"}
     h, w = image.shape[:2]
-    # Convert to different color spaces
     r, g, b = image[:,:,0], image[:,:,1], image[:,:,2]
-    # Dominant colors analysis
     colors = []
     color_regions = []
-    # Analyze different regions (head, body, tail approximation)
-    regions = {
-        "upper": image[:h//3, :, :],
-        "middle": image[h//3:2*h//3, :, :],
-        "lower": image[2*h//3:, :, :]
-    }
     for region_name, region in regions.items():
         rr, rg, rb = np.mean(region[:,:,0]), np.mean(region[:,:,1]), np.mean(region[:,:,2])
-        # Detect specific colors
         region_colors = []
-        if rr > 180 and rg > 180 and rb > 180:
-            region_colors.append("white")
-        if rr < 60 and rg < 60 and rb < 60:
-            region_colors.append("black")
-        if rg > rr * 1.2 and rg > rb * 1.2:
-            region_colors.append("green")
-        if rb > rr * 1.2 and rb > rg * 1.1:
-            region_colors.append("blue")
         if rr > rg * 1.3 and rr > rb * 1.3:
-            if rr > 200:
-                region_colors.append("red")
-            else:
-                region_colors.append("brown")
-        if rr > 150 and rg > 100 and rb < 80:
-            region_colors.append("yellow/orange")
         if abs(rr - rg) < 30 and abs(rg - rb) < 30:
-            if rr > 150:
-                region_colors.append("grey/white")
-            else:
-                region_colors.append("grey")
         if region_colors:
             color_regions.append(f"{region_name}: {', '.join(region_colors)}")
         colors.extend(region_colors)
-    # Unique colors
-    unique_colors = list(set(colors))
-    # Pattern detection (simplified)
     gray = 0.299 * r + 0.587 * g + 0.114 * b
     edges = np.abs(np.gradient(gray, axis=0)) + np.abs(np.gradient(gray, axis=1))
     pattern_intensity = np.mean(edges)
-    has_stripes = pattern_intensity > 20
-    has_spots = False  # Would need more sophisticated detection
-    # Size estimation from aspect ratio
-    aspect = w / h
-    size_guess = "medium"
-    if aspect > 1.5:
-        size_guess = "long-tailed"
-    elif aspect < 0.7:
-        size_guess = "compact/round"
     return {
-        "colors": unique_colors,
         "color_regions": color_regions,
-        "has_patterns": has_stripes,
-        "size_hint": size_guess,
-        "pattern_intensity": pattern_intensity
     }
 # ================== LLM PROMPTS ==================
-SYSTEM_PROMPT = """You are an expert ornithologist specializing in bird identification. You have encyclopedic knowledge of 10,000+ bird species worldwide, with particular expertise in Indian birds (1,300+ species).
-CRITICAL RULES:
-1. Identify birds based ONLY on the provided audio/image/description features
-2. List ALL possible matching species with confidence scores
-3. ALWAYS provide scientific names (they are REQUIRED for image lookup)
-4. Be specific about WHY each bird matches the features
-5. Consider geographic context (India-focused)
-Your response MUST be valid JSON in this exact format:
 {
     "birds": [
         {
-            "name": "Common English Name",
             "scientific_name": "Genus species",
             "confidence": 85,
-            "reasoning": "Detailed explanation of why this bird matches"
         }
     ],
-    "analysis": "Overall analysis of the recording/image",
-    "habitat_notes": "Relevant habitat information"
-}
-IMPORTANT: The scientific_name is REQUIRED and must be accurate - it's used to fetch the correct bird image."""
 def parse_llm_response(response: str) -> Tuple[List[Dict], str]:
     """Parse LLM JSON response."""
-    birds = []
-    analysis = ""
     if not response:
-        return birds, "No response from LLM"
-    # Try to extract JSON
     try:
-        # Find JSON block
         json_match = re.search(r'\{[\s\S]*\}', response)
         if json_match:
             data = json.loads(json_match.group())
             birds = data.get("birds", [])
             analysis = data.get("analysis", "")
-    except json.JSONDecodeError:
-        # Try to parse structured text
         pass
     return birds, analysis
 def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> str:
-    """Format results with DYNAMIC images (no hardcoding)."""
-    output = "## 🐦 Birds Identified\n\n"
     if analysis:
         output += f"*{analysis}*\n\n"
     if extra_info:
-        output += f"{extra_info}\n\n"
     if not birds:
-        output += "### No birds identified. Please try with clearer audio/image.\n"
-        return output
     for i, bird in enumerate(birds, 1):
         name = bird.get("name", "Unknown")
@@ -575,264 +600,336 @@ def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> st
         conf = bird.get("confidence", 0)
         reason = bird.get("reasoning", "")
-        # DYNAMIC image fetch - NO HARDCODING
         img_url = get_wikipedia_image(name, scientific)
         badge = "🟢 HIGH" if conf >= 80 else "🟡 MEDIUM" if conf >= 60 else "🔴 LOW"
-        output += f"""
----
-### {i}. **{name}** ({conf}%) {badge}
 ![{name}]({img_url})
-**Scientific Name:** *{scientific}*
-**Why this bird:** {reason}
 """
     return output
-# ================== MAIN FUNCTIONS ==================
-def identify_audio(audio, location: str = "", month: str = ""):
-    """Identify bird from audio."""
     if audio is None:
-        return "### ⚠️ Please record or upload audio"
     status = get_llm_status()
     try:
         sr, audio_data = audio
         audio_data, sr = preprocess_audio(audio_data, sr)
-        # SAM-Audio preprocessing
         bird_audio, sam_metadata = sam_audio.separate_bird_calls(audio_data)
         multi_sources = sam_audio.detect_multiple_birds(bird_audio)
-        # Extract features
         features = extract_features(bird_audio, sr, sam_metadata)
-        # Build prompt
-        prompt = f"""Identify the bird(s) in this audio recording:
 {features.to_prompt()}
 """
         if location:
-            prompt += f"LOCATION: {location}\n"
         if month:
-            prompt += f"MONTH: {month}\n"
         if len(multi_sources) > 1:
-            prompt += f"\nNOTE: Multiple frequency bands detected ({len(multi_sources)}) - likely multiple birds calling!\n"
-        prompt += "\nIdentify ALL birds that match these audio characteristics. Provide scientific names."
-        response = call_llm(prompt, SYSTEM_PROMPT)
-        birds, analysis = parse_llm_response(response)
-        extra_info = f"**🔊 SAM-Audio:** {sam_metadata.get('separation_ratio', 0)*100:.0f}% separation | **LLM:** {status}"
         if birds:
-            return format_results(birds, analysis, extra_info)
         else:
-            return f"""### ⚠️ Could not identify bird
-**Audio Features Detected:**
 {features.to_prompt()}
-**LLM Response:** {response[:500] if response else 'No response'}
 **Status:** {status}
-Please ensure Ollama is running with llama3.2 or phi4 model."""
     except Exception as e:
-        return f"### ❌ Error: {str(e)}\n\n**LLM:** {status}"
-def identify_image(image):
-    """Identify bird from image using YOLO-style analysis + LLM."""
     if image is None:
-        return "### ⚠️ Please upload an image"
     status = get_llm_status()
     try:
         img = np.array(image) if not isinstance(image, np.ndarray) else image
-        # YOLO-style feature extraction
         features = analyze_image_features(img)
         if "error" in features:
-            return f"### ⚠️ {features['error']}"
-        # Build detailed prompt
-        prompt = f"""Identify the bird in this image based on visual analysis:
-IMAGE ANALYSIS (YOLO-style feature extraction):
-- Detected colors: {', '.join(features['colors']) if features['colors'] else 'mixed/unclear'}
-- Color distribution by region:
-  {chr(10).join('  - ' + r for r in features['color_regions'])}
-- Pattern detected: {'Yes (striped/patterned)' if features['has_patterns'] else 'No distinct patterns'}
-- Body shape: {features['size_hint']}
-Based on these visual features, identify ALL possible Indian bird species that match.
-Consider color patterns, size, and shape carefully.
-IMPORTANT: Provide accurate scientific names for each bird."""
-        response = call_llm(prompt, SYSTEM_PROMPT)
-        birds, analysis = parse_llm_response(response)
-        extra_info = f"**📷 Visual Analysis:** {', '.join(features['colors'])} | **LLM:** {status}"
         if birds:
-            return format_results(birds, analysis, extra_info)
         else:
-            return f"""### ⚠️ Could not identify bird from image
-**Detected Colors:** {', '.join(features['colors'])}
-**Color Regions:** {'; '.join(features['color_regions'])}
-**LLM Response:** {response[:500] if response else 'No response'}
-**Status:** {status}"""
     except Exception as e:
-        return f"### ❌ Error: {str(e)}\n\n**LLM:** {status}"
-def identify_description(description: str):
-    """Identify bird from description."""
     if not description or len(description.strip()) < 5:
-        return "### ⚠️ Please enter a description"
     status = get_llm_status()
     prompt = f"""Identify the bird(s) from this description:
-USER DESCRIPTION:
-{description}
-Focus on Indian birds. Match the description to specific species.
-IMPORTANT: Provide accurate scientific names for image lookup."""
-    response = call_llm(prompt, SYSTEM_PROMPT)
-    birds, analysis = parse_llm_response(response)
-    extra_info = f"**📝 Description Match** | **LLM:** {status}"
     if birds:
-        return format_results(birds, analysis, extra_info)
     else:
-        return f"""### ⚠️ Could not identify bird
-**LLM Response:** {response[:500] if response else 'No response'}
-**Status:** {status}"""
 # ================== GRADIO UI ==================
-with gr.Blocks(title="🐦 BirdSense Pro", theme=gr.themes.Soft()) as demo:
     gr.HTML("""
-    <div style="text-align: center; background: linear-gradient(135deg, #1a4d2e 0%, #2d5a3e 50%, #1a4d2e 100%); padding: 2rem; border-radius: 16px; margin-bottom: 1rem;">
-        <h1 style="color: #4ade80; font-size: 2.5rem; margin: 0;">🐦 BirdSense Pro</h1>
-        <p style="color: #94a3b8; font-size: 1.1rem;">META SAM-Audio + Llama3.2/Phi4 LLM</p>
-        <p style="color: #64748b; font-size: 0.9rem;">Dynamic Wikipedia Images • No Hardcoding • 10,000+ Species</p>
     </div>
     """)
-    gr.Markdown(f"### Current LLM: {get_llm_status()}")
     with gr.Tabs():
         with gr.Tab("🎤 Audio Identification"):
             gr.Markdown("""
-**How it works:**
-1. SAM-Audio separates bird calls from background noise
-2. Features (frequency, syllables, pattern) are extracted
-3. LLM identifies matching species from 10,000+ birds
-4. Images are fetched dynamically from Wikipedia
             """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    audio_in = gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎤 Record or Upload")
                     with gr.Row():
-                        loc = gr.Textbox(label="📍 Location", placeholder="Western Ghats, Mumbai...")
-                        month = gr.Dropdown(label="📅 Month", choices=[""] + [
-                            "January", "February", "March", "April", "May", "June",
-                            "July", "August", "September", "October", "November", "December"
-                        ])
-                    audio_btn = gr.Button("🔍 Identify Bird", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    audio_out = gr.Markdown()
-            audio_btn.click(identify_audio, [audio_in, loc, month], audio_out)
         with gr.Tab("📷 Image Identification"):
             gr.Markdown("""
-**YOLO-style visual analysis:**
-- Extracts colors from different regions (head, body, tail)
-- Detects patterns and shapes
-- LLM matches to bird species
             """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    img_in = gr.Image(sources=["upload", "webcam"], type="numpy", label="📷 Upload or Capture")
-                    img_btn = gr.Button("🔍 Identify Bird", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    img_out = gr.Markdown()
-            img_btn.click(identify_image, [img_in], img_out)
         with gr.Tab("📝 Description"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    desc_in = gr.Textbox(
-                        label="Describe the bird",
-                        lines=4,
-                        placeholder="Example: Small green bird with red forehead, making repetitive 'tuk-tuk' sound, seen in garden"
                     )
-                    desc_btn = gr.Button("🔍 Identify Bird", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    desc_out = gr.Markdown()
-            desc_btn.click(identify_description, [desc_in], desc_out)
         with gr.Tab("ℹ️ About"):
             gr.Markdown("""
-## 🐦 BirdSense Pro - Technical Details
-### No Hardcoding Policy
-- **Images**: Dynamically fetched from Wikipedia based on LLM-provided scientific names
-- **Species**: LLM has knowledge of 10,000+ bird species, not limited to a fixed list
-- **Identification**: Pure AI reasoning, no lookup tables
-### Models Used
-- **Audio**: META SAM-Audio style preprocessing (500-10000 Hz bird call isolation)
-- **LLM**: Llama3.2 / Phi4 (local) or Mistral-7B (cloud)
-- **Image**: YOLO-inspired color/pattern extraction → LLM reasoning
-### API Endpoints
-- **Local**: Ollama at localhost:11434
-- **Cloud**: HuggingFace Inference API (fallback)
 ### CSCR Initiative
 Open-source bird identification for researchers in India.
             """)
     gr.HTML("""
-    <div style="text-align: center; padding: 1rem; margin-top: 1rem; border-top: 1px solid #334155;">
-        <p style="color: #4ade80;">🐦 BirdSense Pro - CSCR Initiative</p>
-        <p style="color: #64748b; font-size: 0.8rem;">Dynamic Images • No Hardcoding • LLM-Powered</p>
     </div>
     """)
@@ -840,5 +937,5 @@ Open-source bird identification for researchers in India.
 if __name__ == "__main__":
     print(f"\n🐦 BirdSense Pro")
     print(f"LLM: {get_llm_status()}")
-    print("Starting server...")
     demo.launch(server_name="0.0.0.0", server_port=7860)

 import scipy.signal as signal
 from scipy.ndimage import gaussian_filter1d
 from dataclasses import dataclass
+from typing import Optional, Tuple, List, Dict, Generator
 import json
 import requests
 import re
 import urllib.parse
+import os
 # ================== CONFIG ==================
 SAMPLE_RATE = 48000
 OLLAMA_MODELS = ["llama3.2", "phi4:latest", "qwen2.5:3b"]  # Priority order
 # HuggingFace Inference API (for cloud deployment)
 HF_API_TOKEN = os.environ.get("HF_TOKEN", "")
 HF_API_MODELS = [
     "mistralai/Mistral-7B-Instruct-v0.3",
     "google/flan-t5-xxl",
+    "facebook/opt-1.3b",
 ]
+# ================== CUSTOM CSS ==================
+CUSTOM_CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
+* {
+    font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif !important;
+}
+.gradio-container {
+    max-width: 1400px !important;
+    margin: 0 auto !important;
+    padding: 20px !important;
+}
+h1, h2, h3, h4, h5, h6 {
+    font-family: 'Inter', sans-serif !important;
+    font-weight: 600 !important;
+    letter-spacing: -0.02em !important;
+}
+.header-banner {
+    background: linear-gradient(135deg, #0f2e1f 0%, #1a4d2e 50%, #0f2e1f 100%) !important;
+    padding: 2.5rem !important;
+    border-radius: 20px !important;
+    margin-bottom: 1.5rem !important;
+    box-shadow: 0 10px 40px rgba(0,0,0,0.3) !important;
+}
+.header-banner h1 {
+    color: #4ade80 !important;
+    font-size: 3rem !important;
+    font-weight: 700 !important;
+    margin: 0 0 0.5rem 0 !important;
+    text-shadow: 0 2px 10px rgba(74, 222, 128, 0.3) !important;
+}
+.header-banner p {
+    margin: 0.3rem 0 !important;
+    line-height: 1.5 !important;
+}
+.llm-status {
+    background: #1e293b !important;
+    padding: 12px 20px !important;
+    border-radius: 12px !important;
+    margin-bottom: 1rem !important;
+    font-weight: 500 !important;
+    font-size: 1rem !important;
+}
+.tab-nav button {
+    font-size: 1rem !important;
+    font-weight: 500 !important;
+    padding: 12px 24px !important;
+}
+.primary-btn {
+    background: linear-gradient(135deg, #4ade80 0%, #22c55e 100%) !important;
+    color: #0f172a !important;
+    font-weight: 600 !important;
+    font-size: 1.1rem !important;
+    padding: 16px 32px !important;
+    border-radius: 12px !important;
+    border: none !important;
+    cursor: pointer !important;
+    transition: all 0.2s ease !important;
+    box-shadow: 0 4px 15px rgba(74, 222, 128, 0.3) !important;
+}
+.primary-btn:hover {
+    transform: translateY(-2px) !important;
+    box-shadow: 0 6px 20px rgba(74, 222, 128, 0.4) !important;
+}
+.result-box {
+    background: #1e293b !important;
+    border-radius: 16px !important;
+    padding: 24px !important;
+    min-height: 400px !important;
+}
+textarea, input[type="text"] {
+    font-family: 'Inter', sans-serif !important;
+    font-size: 1rem !important;
+    border-radius: 10px !important;
+}
+code, pre {
+    font-family: 'JetBrains Mono', monospace !important;
+}
+.footer {
+    text-align: center !important;
+    padding: 1.5rem !important;
+    margin-top: 2rem !important;
+    border-top: 1px solid #334155 !important;
+}
 """
+# ================== DYNAMIC IMAGE SEARCH ==================
 def get_wikipedia_image(bird_name: str, scientific_name: str = "") -> str:
+    """Dynamically fetch bird image from Wikipedia. No hardcoding."""
     search_terms = []
     if scientific_name:
         search_terms.append(scientific_name.replace(" ", "_"))
     for term in search_terms:
         try:
             wiki_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{urllib.parse.quote(term)}"
             resp = requests.get(wiki_url, timeout=5, headers={"User-Agent": "BirdSense/1.0"})
                 data = resp.json()
                 if "thumbnail" in data and "source" in data["thumbnail"]:
                     img_url = data["thumbnail"]["source"]
                     img_url = img_url.replace("/220px-", "/400px-").replace("/320px-", "/400px-")
                     return img_url
                 elif "originalimage" in data and "source" in data["originalimage"]:
                     return data["originalimage"]["source"]
+        except:
             continue
+    # Fallback: Wikimedia Commons search
     try:
+        commons_url = "https://commons.wikimedia.org/w/api.php"
         params = {
+            "action": "query", "format": "json",
+            "list": "search", "srsearch": f"{bird_name} bird",
+            "srnamespace": "6", "srlimit": "1"
         }
         resp = requests.get(commons_url, params=params, timeout=5)
         if resp.status_code == 200:
             data = resp.json()
             if data.get("query", {}).get("search"):
                 file_title = data["query"]["search"][0]["title"]
                 file_params = {
+                    "action": "query", "format": "json",
+                    "titles": file_title, "prop": "imageinfo",
+                    "iiprop": "url", "iiurlwidth": "400"
                 }
+                file_resp = requests.get(commons_url, params=file_params, timeout=5)
                 if file_resp.status_code == 200:
+                    pages = file_resp.json().get("query", {}).get("pages", {})
                     for page in pages.values():
                         if "imageinfo" in page:
                             return page["imageinfo"][0].get("thumburl", page["imageinfo"][0].get("url", ""))
     except:
         pass
     return "https://upload.wikimedia.org/wikipedia/commons/thumb/5/5a/Bird_icon.svg/200px-Bird_icon.svg.png"
         f, t, Zxx = signal.stft(audio, self.sr, nperseg=2048)
         magnitude = np.abs(Zxx)
+        bands = [("low", 500, 2000), ("mid", 2000, 5000), ("high", 5000, 10000)]
         detected = []
         for band_name, low, high in bands:
             band_idx = (f >= low) & (f <= high)
     return None
+def call_ollama_stream(prompt: str, system: str = None) -> Generator[str, None, None]:
+    """Call local Ollama LLM with streaming."""
+    model = get_available_ollama_model()
+    if not model:
+        yield "⚠️ Ollama not available"
+        return
+    payload = {
+        "model": model,
+        "prompt": prompt,
+        "stream": True,
+        "options": {"temperature": 0.2, "num_predict": 2000}
+    }
+    if system:
+        payload["system"] = system
+    try:
+        with requests.post(f"{OLLAMA_URL}/api/generate", json=payload, stream=True, timeout=180) as r:
+            full_response = ""
+            for line in r.iter_lines():
+                if line:
+                    try:
+                        data = json.loads(line)
+                        chunk = data.get("response", "")
+                        full_response += chunk
+                        yield full_response
+                    except:
+                        continue
+    except Exception as e:
+        yield f"Error: {e}"
 def call_ollama(prompt: str, system: str = None) -> Optional[str]:
+    """Call local Ollama LLM (non-streaming)."""
     model = get_available_ollama_model()
     if not model:
         return None
 def call_hf_inference(prompt: str, system: str = None) -> Optional[str]:
     """Call HuggingFace Inference API."""
     full_prompt = f"{system}\n\n{prompt}" if system else prompt
     if len(full_prompt) > 4000:
         full_prompt = full_prompt[:4000]
             payload = {
                 "inputs": full_prompt,
+                "parameters": {"max_new_tokens": 1000, "temperature": 0.3, "return_full_text": False}
             }
             resp = requests.post(url, headers=headers, json=payload, timeout=90)
                     if text and len(text) > 20:
                         return text
                 elif isinstance(result, dict):
+                    return result.get("generated_text", "")
         except Exception as e:
             continue
     return None
 def call_llm(prompt: str, system: str = None) -> Optional[str]:
     """Call LLM - Ollama first, HuggingFace fallback."""
     result = call_ollama(prompt, system)
     if result:
         return result
     return call_hf_inference(prompt, system)
     sam_metadata: dict
     def to_prompt(self) -> str:
+        freq_desc = "very low (<500Hz, large bird)" if self.peak_frequency < 500 else \
                    "low (500-1500Hz, koel/coucal)" if self.peak_frequency < 1500 else \
+                   "medium (1500-4000Hz, songbird)" if self.peak_frequency < 4000 else \
+                   "high (4000-7000Hz, warbler)" if self.peak_frequency < 7000 else \
+                   "very high (>7000Hz, alarm call)"
+        return f"""AUDIO ANALYSIS (SAM-Audio processed):
+• Duration: {self.duration:.2f}s
+• Peak frequency: {self.peak_frequency:.0f} Hz ({freq_desc})
+• Frequency range: {self.freq_range[0]:.0f} - {self.freq_range[1]:.0f} Hz
+• Pattern: {"MELODIC" if self.is_melodic else "MONOTONE"}, {"REPETITIVE ({:.1f}/sec)".format(self.syllable_rate) if self.is_repetitive else "VARIABLE"}
+• Syllables: {self.num_syllables}
+• Quality: SNR {self.snr_db:.0f}dB
+• SAM separation: {self.sam_metadata.get('separation_ratio', 0)*100:.0f}%"""
 def extract_features(audio: np.ndarray, sr: int, sam_metadata: dict) -> AudioFeatures:
     """Extract comprehensive audio features."""
     duration = len(audio) / sr
     freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
     peak_freq = freqs[np.argmax(psd)]
     total_power = np.sum(psd) + 1e-10
     centroid = np.sum(freqs * psd) / total_power
     bandwidth = np.sqrt(np.sum(((freqs - centroid) ** 2) * psd) / total_power)
         for c in chunks:
             if len(c) > 512:
                 _, cpsd = signal.welch(c, sr, nperseg=min(1024, len(c)))
+                if len(cpsd) == len(freqs):
+                    chunk_freqs.append(freqs[np.argmax(cpsd)])
         if chunk_freqs:
             is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
     noise = np.percentile(np.abs(audio), 5)
     sig = np.percentile(np.abs(audio), 95)
     snr = 20 * np.log10((sig + 1e-10) / (noise + 1e-10))
     return AudioFeatures(
+        duration=duration, peak_frequency=float(peak_freq),
         freq_range=(float(freq_low), float(freq_high)),
+        num_syllables=num_syl, syllable_rate=float(syl_rate),
+        is_melodic=is_melodic, is_repetitive=syl_rate > 3,
+        snr_db=float(snr), spectral_centroid=float(centroid),
+        spectral_bandwidth=float(bandwidth), sam_metadata=sam_metadata
     )
     return audio_data, sr
+# ================== IMAGE ANALYSIS ==================
 def analyze_image_features(image: np.ndarray) -> Dict:
+    """YOLO-inspired image feature extraction."""
     if len(image.shape) != 3 or image.shape[2] < 3:
         return {"error": "Invalid image"}
     h, w = image.shape[:2]
     r, g, b = image[:,:,0], image[:,:,1], image[:,:,2]
     colors = []
     color_regions = []
+    regions = {"upper": image[:h//3, :, :], "middle": image[h//3:2*h//3, :, :], "lower": image[2*h//3:, :, :]}
     for region_name, region in regions.items():
         rr, rg, rb = np.mean(region[:,:,0]), np.mean(region[:,:,1]), np.mean(region[:,:,2])
         region_colors = []
+        if rr > 180 and rg > 180 and rb > 180: region_colors.append("white")
+        if rr < 60 and rg < 60 and rb < 60: region_colors.append("black")
+        if rg > rr * 1.2 and rg > rb * 1.2: region_colors.append("green")
+        if rb > rr * 1.2 and rb > rg * 1.1: region_colors.append("blue")
         if rr > rg * 1.3 and rr > rb * 1.3:
+            region_colors.append("red" if rr > 200 else "brown")
+        if rr > 150 and rg > 100 and rb < 80: region_colors.append("yellow/orange")
         if abs(rr - rg) < 30 and abs(rg - rb) < 30:
+            region_colors.append("grey/white" if rr > 150 else "grey")
         if region_colors:
             color_regions.append(f"{region_name}: {', '.join(region_colors)}")
         colors.extend(region_colors)
     gray = 0.299 * r + 0.587 * g + 0.114 * b
     edges = np.abs(np.gradient(gray, axis=0)) + np.abs(np.gradient(gray, axis=1))
     pattern_intensity = np.mean(edges)
     return {
+        "colors": list(set(colors)),
         "color_regions": color_regions,
+        "has_patterns": pattern_intensity > 20,
+        "size_hint": "long-tailed" if w/h > 1.5 else "compact" if w/h < 0.7 else "medium"
     }
 # ================== LLM PROMPTS ==================
+SYSTEM_PROMPT = """You are an expert ornithologist with encyclopedic knowledge of 10,000+ bird species worldwide, specializing in Indian birds (1,300+ species).
+CRITICAL REQUIREMENTS:
+1. Identify birds based ONLY on the provided features
+2. ALWAYS include scientific names (REQUIRED for image lookup)
+3. Provide confidence scores (0-100)
+4. Explain your reasoning
+RESPOND IN VALID JSON:
 {
     "birds": [
         {
+            "name": "Common Name",
             "scientific_name": "Genus species",
             "confidence": 85,
+            "reasoning": "Why this bird matches"
         }
     ],
+    "analysis": "Brief overall analysis"
+}"""
 def parse_llm_response(response: str) -> Tuple[List[Dict], str]:
     """Parse LLM JSON response."""
+    birds, analysis = [], ""
     if not response:
+        return birds, "No response"
     try:
         json_match = re.search(r'\{[\s\S]*\}', response)
         if json_match:
             data = json.loads(json_match.group())
             birds = data.get("birds", [])
             analysis = data.get("analysis", "")
+    except:
         pass
     return birds, analysis
 def format_results(birds: List[Dict], analysis: str, extra_info: str = "") -> str:
+    """Format results with dynamic images."""
+    output = "## 🐦 **Birds Identified**\n\n"
     if analysis:
         output += f"*{analysis}*\n\n"
     if extra_info:
+        output += f"📊 {extra_info}\n\n"
     if not birds:
+        return output + "⚠️ **No birds identified.** Please try with clearer audio/image.\n"
     for i, bird in enumerate(birds, 1):
         name = bird.get("name", "Unknown")
         conf = bird.get("confidence", 0)
         reason = bird.get("reasoning", "")
         img_url = get_wikipedia_image(name, scientific)
         badge = "🟢 HIGH" if conf >= 80 else "🟡 MEDIUM" if conf >= 60 else "🔴 LOW"
+        output += f"""---
+### {i}. **{name}** — {conf}% {badge}
 ![{name}]({img_url})
+**Scientific:** *{scientific}*
+**Reasoning:** {reason}
 """
     return output
+# ================== MAIN FUNCTIONS (WITH STREAMING) ==================
+def identify_audio_stream(audio, location: str = "", month: str = "") -> Generator[str, None, None]:
+    """Identify bird from audio with streaming."""
     if audio is None:
+        yield "## ⚠️ Please record or upload audio first"
+        return
     status = get_llm_status()
+    yield f"## 🔄 Processing audio...\n\n**LLM:** {status}"
     try:
         sr, audio_data = audio
         audio_data, sr = preprocess_audio(audio_data, sr)
+        yield f"## 🔄 Applying SAM-Audio preprocessing...\n\n**LLM:** {status}"
         bird_audio, sam_metadata = sam_audio.separate_bird_calls(audio_data)
         multi_sources = sam_audio.detect_multiple_birds(bird_audio)
+        yield f"## 🔄 Extracting features...\n\nSAM separation: {sam_metadata['separation_ratio']*100:.0f}%\n\n**LLM:** {status}"
         features = extract_features(bird_audio, sr, sam_metadata)
+        prompt = f"""Identify the bird(s) in this recording:
 {features.to_prompt()}
 """
         if location:
+            prompt += f"Location: {location}\n"
         if month:
+            prompt += f"Month: {month}\n"
         if len(multi_sources) > 1:
+            prompt += f"\nMultiple frequency bands active ({len(multi_sources)}) - possibly multiple birds!\n"
+        prompt += "\nProvide scientific names for all matches."
+        yield f"## 🔄 Consulting {status}...\n\n{features.to_prompt()}"
+        # Stream the response
+        full_response = ""
+        for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
+            full_response = chunk
+            yield f"## 🔄 LLM thinking...\n\n```\n{chunk[:500]}...\n```"
+        # Parse and format final result
+        birds, analysis = parse_llm_response(full_response)
+        extra_info = f"**SAM-Audio:** {sam_metadata['separation_ratio']*100:.0f}% | **LLM:** {status}"
         if birds:
+            yield format_results(birds, analysis, extra_info)
         else:
+            yield f"""## ⚠️ Could not identify bird
+**Audio Features:**
 {features.to_prompt()}
+**LLM Response:**
+```
+{full_response[:800] if full_response else 'No response'}
+```
 **Status:** {status}
+"""
     except Exception as e:
+        yield f"## ❌ Error: {str(e)}\n\n**LLM:** {status}"
+def identify_image_stream(image) -> Generator[str, None, None]:
+    """Identify bird from image with streaming."""
     if image is None:
+        yield "## ⚠️ Please upload an image first"
+        return
     status = get_llm_status()
+    yield f"## 🔄 Analyzing image...\n\n**LLM:** {status}"
     try:
         img = np.array(image) if not isinstance(image, np.ndarray) else image
         features = analyze_image_features(img)
         if "error" in features:
+            yield f"## ⚠️ {features['error']}"
+            return
+        yield f"## 🔄 Colors detected: {', '.join(features['colors'])}\n\n**LLM:** {status}"
+        prompt = f"""Identify the bird based on visual analysis:
+DETECTED FEATURES:
+• Colors: {', '.join(features['colors']) if features['colors'] else 'unclear'}
+• Regions: {'; '.join(features['color_regions'])}
+• Patterns: {'Yes' if features['has_patterns'] else 'No'}
+• Shape: {features['size_hint']}
+Identify Indian bird species that match. Provide scientific names."""
+        full_response = ""
+        for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
+            full_response = chunk
+            yield f"## 🔄 LLM analyzing...\n\n```\n{chunk[:500]}...\n```"
+        birds, analysis = parse_llm_response(full_response)
+        extra_info = f"**Colors:** {', '.join(features['colors'])} | **LLM:** {status}"
         if birds:
+            yield format_results(birds, analysis, extra_info)
         else:
+            yield f"## ⚠️ Could not identify bird\n\n**Colors:** {', '.join(features['colors'])}\n\n**LLM:** {status}"
     except Exception as e:
+        yield f"## ❌ Error: {str(e)}\n\n**LLM:** {status}"
+def identify_description_stream(description: str) -> Generator[str, None, None]:
+    """Identify bird from description with streaming."""
     if not description or len(description.strip()) < 5:
+        yield "## ⚠️ Please enter a description"
+        return
     status = get_llm_status()
+    yield f"## 🔄 Processing description...\n\n**LLM:** {status}"
     prompt = f"""Identify the bird(s) from this description:
+"{description}"
+Focus on Indian birds. Provide scientific names."""
+    full_response = ""
+    for chunk in call_ollama_stream(prompt, SYSTEM_PROMPT):
+        full_response = chunk
+        yield f"## 🔄 LLM thinking...\n\n```\n{chunk[:500]}...\n```"
+    birds, analysis = parse_llm_response(full_response)
     if birds:
+        yield format_results(birds, analysis, f"**LLM:** {status}")
     else:
+        yield f"## ⚠️ Could not identify bird\n\n**LLM:** {status}"
 # ================== GRADIO UI ==================
+with gr.Blocks(
+    title="🐦 BirdSense Pro",
+    css=CUSTOM_CSS
+) as demo:
+    # Header
     gr.HTML("""
+    <div class="header-banner">
+        <h1>🐦 BirdSense Pro</h1>
+        <p style="color: #94a3b8; font-size: 1.2rem; font-weight: 500;">META SAM-Audio + Llama3.2/Phi4 LLM</p>
+        <p style="color: #64748b; font-size: 1rem;">Dynamic Wikipedia Images • No Hardcoding • 10,000+ Species</p>
     </div>
     """)
+    # LLM Status
+    gr.HTML(f"""
+    <div class="llm-status">
+        <strong>Current LLM:</strong> {get_llm_status()}
+    </div>
+    """)
+    # Tabs
     with gr.Tabs():
+        # Audio Tab
         with gr.Tab("🎤 Audio Identification"):
             gr.Markdown("""
+### How it works:
+1. **SAM-Audio** separates bird calls from background noise
+2. **Features** (frequency, syllables, pattern) are extracted
+3. **LLM** identifies matching species from 10,000+ birds
+4. **Images** are fetched dynamically from Wikipedia
             """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    audio_input = gr.Audio(
+                        sources=["microphone", "upload"],
+                        type="numpy",
+                        label="🎤 Record or Upload Audio"
+                    )
                     with gr.Row():
+                        location_input = gr.Textbox(
+                            label="📍 Location (optional)",
+                            placeholder="e.g., Western Ghats, Mumbai"
+                        )
+                        month_input = gr.Dropdown(
+                            label="📅 Month",
+                            choices=["", "January", "February", "March", "April", "May", "June",
+                                    "July", "August", "September", "October", "November", "December"]
+                        )
+                    audio_button = gr.Button(
+                        "🔍 Identify Bird",
+                        variant="primary",
+                        size="lg",
+                        elem_classes=["primary-btn"]
+                    )
                 with gr.Column(scale=2):
+                    audio_output = gr.Markdown(
+                        value="*Results will appear here after identification...*",
+                        elem_classes=["result-box"]
+                    )
+            # Connect button to function
+            audio_button.click(
+                fn=identify_audio_stream,
+                inputs=[audio_input, location_input, month_input],
+                outputs=audio_output
+            )
+        # Image Tab
         with gr.Tab("📷 Image Identification"):
             gr.Markdown("""
+### YOLO-style visual analysis:
+- Extracts **colors** from different regions (head, body, tail)
+- Detects **patterns** and shapes
+- **LLM** matches to bird species
             """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    image_input = gr.Image(
+                        sources=["upload", "webcam"],
+                        type="numpy",
+                        label="📷 Upload or Capture Image"
+                    )
+                    image_button = gr.Button(
+                        "🔍 Identify Bird",
+                        variant="primary",
+                        size="lg",
+                        elem_classes=["primary-btn"]
+                    )
                 with gr.Column(scale=2):
+                    image_output = gr.Markdown(
+                        value="*Results will appear here...*",
+                        elem_classes=["result-box"]
+                    )
+            image_button.click(
+                fn=identify_image_stream,
+                inputs=[image_input],
+                outputs=image_output
+            )
+        # Description Tab
         with gr.Tab("📝 Description"):
+            gr.Markdown("""
+### Describe the bird you saw:
+Include colors, size, call sounds, behavior, habitat...
+            """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    desc_input = gr.Textbox(
+                        label="📝 Bird Description",
+                        lines=5,
+                        placeholder="Example: Small green bird with red forehead, making repetitive 'tuk-tuk' sound, seen in garden near fruit trees"
                     )
+                    desc_button = gr.Button(
+                        "🔍 Identify Bird",
+                        variant="primary",
+                        size="lg",
+                        elem_classes=["primary-btn"]
+                    )
                 with gr.Column(scale=2):
+                    desc_output = gr.Markdown(
+                        value="*Results will appear here...*",
+                        elem_classes=["result-box"]
+                    )
+            desc_button.click(
+                fn=identify_description_stream,
+                inputs=[desc_input],
+                outputs=desc_output
+            )
+        # About Tab
         with gr.Tab("ℹ️ About"):
             gr.Markdown("""
+## 🐦 BirdSense Pro
+### Key Features
+- **No Hardcoding** — Images fetched dynamically from Wikipedia using scientific names
+- **10,000+ Species** — LLM has knowledge of birds worldwide
+- **SAM-Audio** — Isolates bird calls from background noise
+- **Multi-Modal** — Audio, image, and text identification
+### Models
+| Component | Technology |
+|-----------|------------|
+| Audio Preprocessing | META SAM-Audio (500-10000 Hz isolation) |
+| LLM (Local) | Llama3.2 / Phi4 via Ollama |
+| LLM (Cloud) | Mistral-7B via HuggingFace |
+| Image Analysis | YOLO-inspired color/pattern extraction |
 ### CSCR Initiative
 Open-source bird identification for researchers in India.
             """)
+    # Footer
     gr.HTML("""
+    <div class="footer">
+        <p style="color: #4ade80; font-size: 1.1rem; font-weight: 600;">🐦 BirdSense Pro — CSCR Initiative</p>
+        <p style="color: #64748b;">Dynamic Images • No Hardcoding • LLM-Powered</p>
     </div>
     """)
 if __name__ == "__main__":
     print(f"\n🐦 BirdSense Pro")
     print(f"LLM: {get_llm_status()}")
+    print("Starting server on http://localhost:7860")
     demo.launch(server_name="0.0.0.0", server_port=7860)