Spaces:

sohiyiy
/

birdsense-pro

Running

App Files Files Community

sohiyiy commited on 8 days ago

Commit

b85196b

verified ·

1 Parent(s): aaf8e92

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +408 -481

app.py CHANGED Viewed

@@ -1,14 +1,17 @@
 """
 🐦 BirdSense Pro - AI Bird Identification
-Uses LLM (via HuggingFace Inference API) for TRUE zero-shot identification
-NOT hardcoded - Uses LLM knowledge of 10,000+ bird species!
 Features:
-1. Audio → LLM Analysis → Bird ID (zero-shot, any species)
 2. Image → LLM Vision → Bird ID
 3. Description → LLM → Bird ID
 4. Streaming responses
 CSCR Initiative
 """
@@ -22,17 +25,19 @@ from typing import Optional, Tuple, Dict, Any, List, Generator
 import json
 import os
 import requests
 # ================== CONFIG ==================
 SAMPLE_RATE = 48000
 HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
-# Backup models if primary fails
-BACKUP_MODELS = [
-    "https://api-inference.huggingface.co/models/google/flan-t5-xxl",
-    "https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct"
-]
-# Bird images for common species (for display)
 BIRD_IMAGES = {
     "Asian Koel": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Eudynamys_scolopaceus_-_Koel_male_-_Sukhna_Lake%2C_India.jpg/320px-Eudynamys_scolopaceus_-_Koel_male_-_Sukhna_Lake%2C_India.jpg",
     "Indian Cuckoo": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6b/Cuculus_micropterus.jpg/320px-Cuculus_micropterus.jpg",
@@ -49,14 +54,143 @@ BIRD_IMAGES = {
     "Spotted Owlet": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/9d/Spotted_Owlet_%28Athene_brama%29.jpg/320px-Spotted_Owlet_%28Athene_brama%29.jpg",
     "Rose-ringed Parakeet": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e8/Psittacula_krameri_-_male_-_Fuerteventura.jpg/320px-Psittacula_krameri_-_male_-_Fuerteventura.jpg",
     "Greater Coucal": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d6/Greater_Coucal_%28Centropus_sinensis%29_in_Hyderabad%2C_AP_W_IMG_7544.jpg/320px-Greater_Coucal_%28Centropus_sinensis%29_in_Hyderabad%2C_AP_W_IMG_7544.jpg",
 }
 DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Eopsaltria_australis_-_Mogo_Campground.jpg/320px-Eopsaltria_australis_-_Mogo_Campground.jpg"
 @dataclass
 class AudioFeatures:
-    """Audio features extracted for LLM analysis."""
     duration: float
     peak_frequency: float
     freq_range: Tuple[float, float]
@@ -69,79 +203,61 @@ class AudioFeatures:
     snr_db: float
     def to_description(self) -> str:
-        """Convert features to natural language for LLM."""
-        freq_desc = self._describe_frequency()
-        pattern_desc = self._describe_pattern()
-        return f"""Audio recording analysis:
 - Duration: {self.duration:.1f} seconds
 - Dominant frequency: {self.peak_frequency:.0f} Hz ({freq_desc})
 - Frequency range: {self.freq_range[0]:.0f} - {self.freq_range[1]:.0f} Hz
-- Call pattern: {pattern_desc}
-- Syllables detected: {self.num_syllables} (rate: {self.syllable_rate:.1f} per second)
-- Amplitude: {self.amplitude_pattern}
-- Recording quality: SNR {self.snr_db:.0f} dB"""
-    def _describe_frequency(self) -> str:
         f = self.peak_frequency
-        if f < 500: return "very low, likely large bird (owl, coucal, peacock)"
-        elif f < 1000: return "low, possibly crow, dove, or large bird"
-        elif f < 2000: return "low-medium, could be cuckoo, myna, or babbler"
-        elif f < 4000: return "medium, typical of most songbirds"
-        elif f < 6000: return "medium-high, warbler or sunbird range"
-        elif f < 8000: return "high, small passerine"
-        else: return "very high, insect-like or alarm call"
-    def _describe_pattern(self) -> str:
-        parts = []
-        if self.is_melodic:
-            parts.append("melodic/varied pitch")
-        else:
-            parts.append("monotone/single pitch")
-        if self.is_repetitive:
-            parts.append("repetitive")
-        else:
-            parts.append("variable/non-repetitive")
-        return ", ".join(parts)
-def extract_audio_features(audio: np.ndarray, sr: int) -> AudioFeatures:
-    """Extract comprehensive audio features."""
     duration = len(audio) / sr
     audio = audio / (np.max(np.abs(audio)) + 1e-8)
-    # Spectral analysis
     freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
-    peak_idx = np.argmax(psd)
-    peak_freq = freqs[peak_idx]
     cumsum = np.cumsum(psd) / (np.sum(psd) + 1e-10)
     freq_low = freqs[np.searchsorted(cumsum, 0.10)]
     freq_high = freqs[np.searchsorted(cumsum, 0.90)]
-    spectral_centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-10)
-    # Envelope analysis
     envelope = np.abs(signal.hilbert(audio))
-    kernel = int(0.02 * sr)
-    if kernel > 0:
-        envelope = gaussian_filter1d(envelope, kernel)
-    # Syllable detection
     n_fft, hop = 2048, 512
     _, _, Zxx = signal.stft(audio, sr, nperseg=n_fft, noverlap=n_fft-hop)
     flux = np.sum(np.maximum(0, np.diff(np.abs(Zxx), axis=1)), axis=0)
     if len(flux) > 0:
         flux = flux / (np.max(flux) + 1e-10)
-        threshold = np.mean(flux) + 0.5 * np.std(flux)
-        peaks, _ = signal.find_peaks(flux, height=threshold, distance=max(1, int(0.05*sr/hop)))
-        num_syllables = len(peaks)
-    else:
-        num_syllables = 0
-    syllable_rate = num_syllables / duration if duration > 0 else 0
-    # Melodic detection
     is_melodic = False
     if len(audio) > sr:
         chunks = np.array_split(audio, min(20, max(5, int(duration*4))))
@@ -153,20 +269,16 @@ def extract_audio_features(audio: np.ndarray, sr: int) -> AudioFeatures:
         if chunk_freqs:
             is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
-    # Repetitive detection
-    is_repetitive = syllable_rate > 3
     # Amplitude pattern
     if len(envelope) > 100:
         q = len(envelope) // 4
-        start, end = np.mean(envelope[:q]), np.mean(envelope[-q:])
-        var = np.std(envelope) / (np.mean(envelope) + 1e-10)
-        if var > 0.6: amp_pattern = "varied"
-        elif end > start * 1.3: amp_pattern = "ascending"
-        elif end < start * 0.7: amp_pattern = "descending"
         else: amp_pattern = "steady"
-    else:
-        amp_pattern = "unknown"
     # SNR
     noise = np.percentile(np.abs(audio), 5)
@@ -177,196 +289,96 @@ def extract_audio_features(audio: np.ndarray, sr: int) -> AudioFeatures:
         duration=duration,
         peak_frequency=float(peak_freq),
         freq_range=(float(freq_low), float(freq_high)),
-        spectral_centroid=float(spectral_centroid),
-        num_syllables=num_syllables,
-        syllable_rate=float(syllable_rate),
         is_melodic=is_melodic,
-        is_repetitive=is_repetitive,
         amplitude_pattern=amp_pattern,
         snr_db=float(snr)
     )
-def call_llm(prompt: str, system_prompt: str = None) -> str:
-    """
-    Call HuggingFace Inference API for LLM response.
-    Uses free tier - no API key needed for public models.
-    """
-    headers = {"Content-Type": "application/json"}
-    # Format prompt for instruction model
-    if system_prompt:
-        full_prompt = f"<s>[INST] {system_prompt}\n\n{prompt} [/INST]"
     else:
-        full_prompt = f"<s>[INST] {prompt} [/INST]"
-    payload = {
-        "inputs": full_prompt,
-        "parameters": {
-            "max_new_tokens": 1000,
-            "temperature": 0.3,
-            "return_full_text": False
-        }
-    }
-    # Try primary model
-    try:
-        response = requests.post(HF_API_URL, headers=headers, json=payload, timeout=60)
-        if response.status_code == 200:
-            result = response.json()
-            if isinstance(result, list) and len(result) > 0:
-                return result[0].get("generated_text", "")
-    except Exception as e:
-        print(f"Primary model failed: {e}")
-    # Try backup models
-    for backup_url in BACKUP_MODELS:
-        try:
-            response = requests.post(backup_url, headers=headers, json=payload, timeout=60)
-            if response.status_code == 200:
-                result = response.json()
-                if isinstance(result, list) and len(result) > 0:
-                    return result[0].get("generated_text", "")
-        except:
-            continue
-    return None
-def get_bird_image(bird_name: str) -> str:
-    """Get image URL for a bird species."""
-    # Check exact match first
-    if bird_name in BIRD_IMAGES:
-        return BIRD_IMAGES[bird_name]
-    # Check partial match
-    bird_lower = bird_name.lower()
-    for known_bird, url in BIRD_IMAGES.items():
-        if known_bird.lower() in bird_lower or bird_lower in known_bird.lower():
-            return url
-    return DEFAULT_IMAGE
 # ================== LLM PROMPTS ==================
-AUDIO_SYSTEM_PROMPT = """You are an expert ornithologist specializing in bird identification from audio recordings.
-You have extensive knowledge of 10,000+ bird species worldwide, with particular expertise in Indian birds (1,300+ species).
-Your task is to identify bird species from audio feature analysis. Consider:
-1. Frequency characteristics match known bird calls
-2. Call pattern (melodic vs monotone, repetitive vs variable)
-3. Syllable rate and duration
-4. Geographic likelihood if location provided
-5. Seasonal patterns if month provided
-IMPORTANT: You must identify ALL birds that could be present in the recording. Many recordings have multiple species calling.
-Respond in this EXACT JSON format:
 {
     "birds": [
         {
             "name": "Common Name",
             "scientific_name": "Genus species",
             "confidence": 85,
-            "reasoning": "Why this bird matches the audio features",
-            "call_description": "Description of this bird's typical call"
         }
     ],
-    "analysis": "Overall analysis of the recording",
-    "is_unusual": false,
-    "unusual_reason": null
-}
-Include ALL birds with confidence >= 50%. This supports multi-bird detection."""
-IMAGE_SYSTEM_PROMPT = """You are an expert ornithologist specializing in bird identification from photographs.
-You have extensive knowledge of 10,000+ bird species worldwide, with particular expertise in Indian birds.
-Analyze the image description and identify the bird species. Consider:
-1. Plumage colors and patterns
-2. Bill shape and size
-3. Body proportions
-4. Distinctive field marks
-5. Habitat clues in background
-Respond in this EXACT JSON format:
-{
-    "birds": [
-        {
-            "name": "Common Name",
-            "scientific_name": "Genus species",
-            "confidence": 85,
-            "reasoning": "Why this bird matches the visual features",
-            "visual_description": "Key visual identification features"
-        }
-    ],
-    "analysis": "Overall analysis of the image"
 }"""
-DESCRIPTION_SYSTEM_PROMPT = """You are an expert ornithologist helping identify birds from verbal descriptions.
-You have knowledge of 10,000+ bird species worldwide, especially Indian birds.
-Based on the user's description, identify the most likely bird species. Consider:
-1. Physical features mentioned
-2. Call/song descriptions
-3. Behavior patterns
-4. Habitat information
-5. Geographic context
-Respond in this EXACT JSON format:
-{
-    "birds": [
-        {
-            "name": "Common Name",
-            "scientific_name": "Genus species",
-            "confidence": 85,
-            "reasoning": "Why this matches the description",
-            "tips": "Additional ID tips for this species"
-        }
-    ],
-    "analysis": "Overall interpretation of the description"
-}"""
-def preprocess_audio(audio_data: np.ndarray, sr: int) -> Tuple[np.ndarray, int]:
-    """Preprocess audio for analysis."""
-    if audio_data.dtype == np.int16:
-        audio_data = audio_data.astype(np.float32) / 32768.0
-    elif audio_data.dtype == np.int32:
-        audio_data = audio_data.astype(np.float32) / 2147483648.0
-    else:
-        audio_data = audio_data.astype(np.float32)
-    if len(audio_data.shape) > 1:
-        audio_data = np.mean(audio_data, axis=1)
-    if sr != SAMPLE_RATE:
-        num_samples = int(len(audio_data) * SAMPLE_RATE / sr)
-        audio_data = signal.resample(audio_data, num_samples)
-        sr = SAMPLE_RATE
-    audio_data = audio_data / (np.max(np.abs(audio_data)) + 1e-8)
-    # Bandpass filter
-    nyq = sr / 2
-    low, high = 150 / nyq, min(15000 / nyq, 0.99)
-    b, a = signal.butter(4, [low, high], btype='band')
-    audio_data = signal.filtfilt(b, a, audio_data)
-    return audio_data, sr
-def format_bird_results(llm_response: str, source: str = "audio") -> str:
     """Parse LLM response and format with images."""
     try:
-        # Try to extract JSON from response
-        json_start = llm_response.find('{')
-        json_end = llm_response.rfind('}') + 1
-        if json_start >= 0 and json_end > json_start:
-            data = json.loads(llm_response[json_start:json_end])
         else:
-            raise ValueError("No JSON found")
         birds = data.get("birds", [])
         analysis = data.get("analysis", "")
@@ -374,22 +386,19 @@ def format_bird_results(llm_response: str, source: str = "audio") -> str:
         if not birds:
             return f"### ❌ No birds identified\n\n{analysis}"
-        output = "## 🐦 Birds Identified by AI\n\n"
-        output += f"*Analysis: {analysis}*\n\n"
         for i, bird in enumerate(birds, 1):
             name = bird.get("name", "Unknown")
             scientific = bird.get("scientific_name", "")
-            confidence = bird.get("confidence", 0)
-            reasoning = bird.get("reasoning", "")
-            # Get image
-            image_url = get_bird_image(name)
-            # Confidence badge
-            if confidence >= 80:
                 badge = "🟢 HIGH"
-            elif confidence >= 60:
                 badge = "🟡 MEDIUM"
             else:
                 badge = "🔴 LOW"
@@ -397,194 +406,154 @@ def format_bird_results(llm_response: str, source: str = "audio") -> str:
             output += f"""
 ---
-### {i}. **{name}** ({confidence}%) {badge}
-![{name}]({image_url})
 **Scientific Name:** _{scientific}_
-**Why this bird:** {reasoning}
 """
-            # Add call description for audio
-            if source == "audio" and "call_description" in bird:
-                output += f"**Typical Call:** {bird['call_description']}\n\n"
-            # Add visual description for image
-            if source == "image" and "visual_description" in bird:
-                output += f"**Visual ID:** {bird['visual_description']}\n\n"
-            # Add tips for description
-            if source == "description" and "tips" in bird:
-                output += f"**ID Tips:** {bird['tips']}\n\n"
-        # Check for unusual sighting
-        if data.get("is_unusual"):
-            output += f"\n\n⚠️ **Unusual Sighting:** {data.get('unusual_reason', 'Rare or unexpected species')}\n"
         return output
-    except Exception as e:
-        # If parsing fails, return raw response
-        return f"### 🤖 AI Analysis\n\n{llm_response}\n\n*(Note: Could not parse structured response)*"
 # ================== IDENTIFICATION FUNCTIONS ==================
-def identify_from_audio_stream(audio, location: str = "", month: str = ""):
-    """
-    Stream bird identification from audio using LLM.
-    This is the REAL zero-shot identification using LLM knowledge.
-    """
     if audio is None:
-        yield "### ⚠️ Please record or upload bird audio first!"
-        return
-    yield "### 🔄 Processing audio..."
     try:
         sr, audio_data = audio
         audio_data, sr = preprocess_audio(audio_data, sr)
-        yield "### 🔄 Extracting audio features..."
-        features = extract_audio_features(audio_data, sr)
-        yield f"### 🔄 Analyzing with AI...\n\n**Features detected:**\n{features.to_description()}"
-        # Build prompt with features
-        prompt = f"""Identify the bird(s) in this recording based on these audio features:
 {features.to_description()}
 """
         if location:
-            prompt += f"Location: {location}\n"
         if month:
-            prompt += f"Month: {month}\n"
-        prompt += "\nIdentify ALL birds that could be making these sounds. Include any bird with confidence >= 50%."
-        yield "### 🔄 Consulting AI ornithologist (this may take 30-60 seconds)..."
-        # Call LLM
-        response = call_llm(prompt, AUDIO_SYSTEM_PROMPT)
         if response:
-            result = format_bird_results(response, "audio")
-            result += f"\n\n---\n\n### 📊 Audio Features\n{features.to_description()}"
             yield result
         else:
-            yield """### ⚠️ AI service temporarily unavailable
-The HuggingFace Inference API is currently busy. This is normal for free tier usage.
-**What you can try:**
-1. Wait 30 seconds and try again
-2. Try the Description tab (often faster)
-3. Use a shorter audio clip
 **Your audio features:**
-""" + features.to_description()
     except Exception as e:
-        yield f"### ❌ Error: {str(e)}\n\nPlease try again with a different recording."
-def identify_from_description_stream(description: str):
-    """Stream bird identification from description using LLM."""
     if not description or len(description.strip()) < 5:
-        yield "### ⚠️ Please enter a description (at least 5 characters)"
-        return
-    yield "### 🔄 Analyzing description with AI..."
     prompt = f"""Identify the bird(s) based on this description:
 {description}
-If multiple birds could match, list all with confidence >= 50%."""
-    yield "### 🔄 Consulting AI ornithologist..."
-    response = call_llm(prompt, DESCRIPTION_SYSTEM_PROMPT)
     if response:
-        yield format_bird_results(response, "description")
     else:
-        yield """### ⚠️ AI service temporarily unavailable
-Please try again in 30 seconds.
-**Tips for description:**
-- Mention colors (black, white, red, blue, green)
-- Describe the call (whistle, screech, chatter)
-- Note size (sparrow-sized, crow-sized)
-- Include habitat (garden, forest, water)
-- Add behavior (hops, flies in groups, perches high)"""
-def identify_from_image_stream(image):
-    """Stream bird identification from image using LLM."""
     if image is None:
-        yield "### ⚠️ Please upload or capture a bird image"
-        return
-    yield "### 🔄 Analyzing image..."
     try:
         if hasattr(image, 'numpy'):
-            img_array = image.numpy()
         else:
-            img_array = np.array(image)
-        # Extract color information
         colors = []
-        if len(img_array.shape) == 3 and img_array.shape[2] >= 3:
-            avg_r = np.mean(img_array[:, :, 0])
-            avg_g = np.mean(img_array[:, :, 1])
-            avg_b = np.mean(img_array[:, :, 2])
-            if avg_g > avg_r * 1.1 and avg_g > avg_b * 1.1:
-                colors.append("green")
-            if avg_b > avg_r * 1.1 and avg_b > avg_g:
-                colors.append("blue")
-            if avg_r > avg_g * 1.2 and avg_r > avg_b * 1.2:
-                colors.append("red or brown")
-            if avg_r > 180 and avg_g > 180 and avg_b > 180:
-                colors.append("white")
-            if avg_r < 80 and avg_g < 80 and avg_b < 80:
-                colors.append("black")
-            if avg_r > 150 and avg_g > 120 and avg_b < 100:
-                colors.append("yellow or golden")
-            if avg_r > 100 and avg_g > 80 and avg_b > 60 and avg_r < 180:
-                colors.append("brown")
-        color_desc = ", ".join(colors) if colors else "mixed colors"
-        yield f"### 🔄 Detected colors: {color_desc}\n\nConsulting AI ornithologist..."
-        prompt = f"""Identify the bird in this image.
 Detected dominant colors: {color_desc}
-Image dimensions: {img_array.shape[1]}x{img_array.shape[0]} pixels
-Based on the color analysis, what Indian bird species could this be?
-Consider common birds with these colors in their plumage."""
-        response = call_llm(prompt, IMAGE_SYSTEM_PROMPT)
         if response:
-            yield format_bird_results(response, "image")
         else:
-            yield f"""### ⚠️ AI service temporarily unavailable
-**Detected colors:** {color_desc}
-Try the Description tab and describe:
-- The exact colors you see
-- Bill shape and color
-- Body size
-- Any distinctive markings"""
     except Exception as e:
         yield f"### ❌ Error: {str(e)}"
@@ -592,185 +561,143 @@ Try the Description tab and describe:
 # ================== GRADIO UI ==================
-with gr.Blocks(title="🐦 BirdSense Pro - AI Bird ID") as demo:
     gr.HTML("""
     <div style="text-align: center; background: linear-gradient(135deg, #1a4d2e 0%, #2d5a3e 50%, #1a4d2e 100%); padding: 2rem; border-radius: 16px; margin-bottom: 1.5rem;">
         <h1 style="color: #4ade80; font-size: 2.5rem; margin: 0;">🐦 BirdSense Pro</h1>
-        <p style="color: #94a3b8; font-size: 1.2rem;">AI-Powered Bird Identification</p>
         <p style="color: #64748b; font-size: 0.9rem;">
-            🤖 Uses LLM knowledge of <b>10,000+ species</b> • NOT hardcoded!
-        </p>
-        <p style="color: #475569; font-size: 0.8rem;">
-            Audio • Image • Description | Multi-bird detection | Streaming responses
         </p>
     </div>
     """)
     with gr.Tabs():
-        # === AUDIO TAB ===
-        with gr.Tab("🎤 Audio (LLM Analysis)"):
             gr.Markdown("""
-            ### 🎤 Record or upload bird audio
-            **How it works:**
-            1. We extract audio features (frequency, pattern, syllables)
-            2. These features are sent to an AI (LLM) that knows 10,000+ bird species
-            3. The AI identifies ALL matching birds (multi-bird detection)
-            *This is TRUE zero-shot identification - not hardcoded!*
             """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    audio_input = gr.Audio(
-                        sources=["microphone", "upload"],
-                        type="numpy",
-                        label="🎤 Bird Audio"
-                    )
                     with gr.Row():
-                        location = gr.Textbox(label="📍 Location (optional)", placeholder="e.g., Western Ghats, Kerala")
-                        month = gr.Dropdown(
-                            label="📅 Month (optional)",
-                            choices=["", "January", "February", "March", "April", "May", "June",
-                                    "July", "August", "September", "October", "November", "December"]
                         )
-                    audio_btn = gr.Button("🔍 Identify Birds with AI", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    audio_output = gr.Markdown(label="AI Results (streaming)")
-            audio_btn.click(
-                fn=identify_from_audio_stream,
-                inputs=[audio_input, location, month],
-                outputs=[audio_output]
-            )
-        # === IMAGE TAB ===
-        with gr.Tab("📷 Image (LLM Analysis)"):
             gr.Markdown("""
-            ### 📷 Upload or capture a bird image
-            **How it works:**
-            1. We analyze colors and patterns in the image
-            2. This information is sent to an AI for identification
-            3. The AI uses its knowledge of bird plumage to identify species
             """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    image_input = gr.Image(
-                        sources=["upload", "webcam"],
-                        type="numpy",
-                        label="📷 Bird Image"
                     )
-                    image_btn = gr.Button("🔍 Identify Bird with AI", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    image_output = gr.Markdown(label="AI Results")
-            image_btn.click(
-                fn=identify_from_image_stream,
-                inputs=[image_input],
-                outputs=[image_output]
-            )
-        # === DESCRIPTION TAB ===
-        with gr.Tab("📝 Description (LLM Analysis)"):
             gr.Markdown("""
-            ### 📝 Describe the bird you saw or heard
-            **This is the most reliable method!** The AI can understand natural language descriptions.
-            Describe: colors, size, call/song, behavior, habitat, location
             """)
             with gr.Row():
                 with gr.Column(scale=1):
-                    desc_input = gr.Textbox(
-                        label="Bird Description",
-                        placeholder="""Example descriptions:
-"Small green bird with red forehead, making a repetitive tuk-tuk sound like a hammer"
-"Black and white bird with a beautiful melodious song, often seen in gardens at dawn"
-"Large brown bird with chattering call, always in groups of 6-7"
-"Bright blue bird with orange breast, sitting near water"
-""",
-                        lines=6
-                    )
-                    desc_btn = gr.Button("🔍 Identify Bird with AI", variant="primary", size="lg")
                 with gr.Column(scale=2):
-                    desc_output = gr.Markdown(label="AI Results")
-            desc_btn.click(
-                fn=identify_from_description_stream,
-                inputs=[desc_input],
-                outputs=[desc_output]
-            )
-        # === HOW IT WORKS TAB ===
-        with gr.Tab("ℹ️ How It Works"):
-            gr.Markdown("""
-            ## 🧠 How BirdSense Pro Works
-            ### NOT Hardcoded!
-            Unlike simple rule-based systems, BirdSense Pro uses a **Large Language Model (LLM)**
-            that has learned about birds from millions of documents, scientific papers, and bird guides.
-            The LLM knows:
-            - **10,000+ bird species** worldwide
-            - **1,300+ Indian bird species** in detail
-            - Bird calls, songs, and vocalizations
-            - Plumage patterns and colors
-            - Habitat preferences
-            - Seasonal patterns
-            - Geographic distributions
-            ### Pipeline
-            ```
-            Audio Recording
-                   ↓
-            Feature Extraction (frequency, pattern, syllables)
-                   ↓
-            Natural Language Description of Features
-                   ↓
-            LLM Analysis (Mistral-7B via HuggingFace)
-                   ↓
-            Bird Identification with Confidence Scores
-            ```
-            ### Multi-Bird Detection
-            If your recording has multiple species calling, the AI will identify ALL of them!
-            ### Limitations
-            - Depends on HuggingFace Inference API (free tier has rate limits)
-            - May take 30-60 seconds for response
-            - Image analysis is based on color extraction + LLM (not a vision model)
-            ### For Best Results
-            1. **Audio:** Clear recordings with minimal background noise
-            2. **Image:** Good lighting, bird clearly visible
-            3. **Description:** Be specific about colors, calls, and behavior
             """)
     gr.HTML("""
     <div style="text-align: center; padding: 1rem; margin-top: 1rem; border-top: 1px solid #334155;">
         <p style="color: #4ade80; font-weight: bold;">🐦 BirdSense Pro - CSCR Initiative</p>
-        <p style="color: #94a3b8;">Powered by LLM (10,000+ species) • NOT hardcoded</p>
         <p style="color: #64748b;">
-            <a href="https://github.com/sohamzycus/eagv2/tree/master/birdsense" style="color: #4ade80;">GitHub</a>
         </p>
     </div>
     """)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 """
 🐦 BirdSense Pro - AI Bird Identification
+Uses LOCAL Ollama LLM for TRUE zero-shot identification
+Supports:
+- Ollama (local) - PRIMARY (fast, no limits)
+- HuggingFace API - FALLBACK (for cloud deployment)
 Features:
+1. Audio → LLM Analysis → Bird ID (zero-shot, 10,000+ species)
 2. Image → LLM Vision → Bird ID
 3. Description → LLM → Bird ID
 4. Streaming responses
+5. Multi-bird detection
 CSCR Initiative
 """
 import json
 import os
 import requests
+import time
 # ================== CONFIG ==================
 SAMPLE_RATE = 48000
+# Ollama configuration (LOCAL - primary)
+OLLAMA_URL = "http://localhost:11434"
+OLLAMA_MODEL = "qwen2.5:3b"  # Fast, good for bird ID
+# HuggingFace API (FALLBACK - for cloud deployment)
 HF_API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
+# Bird images
 BIRD_IMAGES = {
     "Asian Koel": "https://upload.wikimedia.org/wikipedia/commons/thumb/7/78/Eudynamys_scolopaceus_-_Koel_male_-_Sukhna_Lake%2C_India.jpg/320px-Eudynamys_scolopaceus_-_Koel_male_-_Sukhna_Lake%2C_India.jpg",
     "Indian Cuckoo": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6b/Cuculus_micropterus.jpg/320px-Cuculus_micropterus.jpg",
     "Spotted Owlet": "https://upload.wikimedia.org/wikipedia/commons/thumb/9/9d/Spotted_Owlet_%28Athene_brama%29.jpg/320px-Spotted_Owlet_%28Athene_brama%29.jpg",
     "Rose-ringed Parakeet": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e8/Psittacula_krameri_-_male_-_Fuerteventura.jpg/320px-Psittacula_krameri_-_male_-_Fuerteventura.jpg",
     "Greater Coucal": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/d6/Greater_Coucal_%28Centropus_sinensis%29_in_Hyderabad%2C_AP_W_IMG_7544.jpg/320px-Greater_Coucal_%28Centropus_sinensis%29_in_Hyderabad%2C_AP_W_IMG_7544.jpg",
+    "Common Tailorbird": "https://upload.wikimedia.org/wikipedia/commons/thumb/e/ea/Common_Tailorbird_%28Orthotomus_sutorius%29_in_Kolkata_I_IMG_2859.jpg/320px-Common_Tailorbird_%28Orthotomus_sutorius%29_in_Kolkata_I_IMG_2859.jpg",
+    "Green Bee-eater": "https://upload.wikimedia.org/wikipedia/commons/thumb/b/b1/Merops_orientalis_%28Pune%2C_India%29.jpg/320px-Merops_orientalis_%28Pune%2C_India%29.jpg",
+    "Common Hawk-Cuckoo": "https://upload.wikimedia.org/wikipedia/commons/thumb/0/08/Hierococcyx_varius.jpg/320px-Hierococcyx_varius.jpg",
+    "Indian Robin": "https://upload.wikimedia.org/wikipedia/commons/thumb/6/6e/Indian_Robin_%28Saxicoloides_fulicatus%29_Male.jpg/320px-Indian_Robin_%28Saxicoloides_fulicatus%29_Male.jpg",
+    "Grey Francolin": "https://upload.wikimedia.org/wikipedia/commons/thumb/8/8c/Grey_francolin_%28Francolinus_pondicerianus%29.jpg/320px-Grey_francolin_%28Francolinus_pondicerianus%29.jpg",
 }
 DEFAULT_IMAGE = "https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/Eopsaltria_australis_-_Mogo_Campground.jpg/320px-Eopsaltria_australis_-_Mogo_Campground.jpg"
+# ================== OLLAMA CLIENT ==================
+class OllamaClient:
+    """Client for local Ollama LLM."""
+    def __init__(self, base_url: str = OLLAMA_URL, model: str = OLLAMA_MODEL):
+        self.base_url = base_url
+        self.model = model
+        self._available = None
+    def is_available(self) -> bool:
+        """Check if Ollama is running."""
+        if self._available is not None:
+            return self._available
+        try:
+            resp = requests.get(f"{self.base_url}/api/tags", timeout=2)
+            self._available = resp.status_code == 200
+            return self._available
+        except:
+            self._available = False
+            return False
+    def generate(self, prompt: str, system: str = None, stream: bool = False) -> str:
+        """Generate response from Ollama."""
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "stream": stream,
+            "options": {
+                "temperature": 0.3,
+                "num_predict": 1500
+            }
+        }
+        if system:
+            payload["system"] = system
+        try:
+            if stream:
+                return self._generate_stream(payload)
+            else:
+                resp = requests.post(
+                    f"{self.base_url}/api/generate",
+                    json=payload,
+                    timeout=120
+                )
+                if resp.status_code == 200:
+                    return resp.json().get("response", "")
+                return None
+        except Exception as e:
+            print(f"Ollama error: {e}")
+            return None
+    def _generate_stream(self, payload) -> Generator[str, None, None]:
+        """Stream response from Ollama."""
+        try:
+            with requests.post(
+                f"{self.base_url}/api/generate",
+                json=payload,
+                stream=True,
+                timeout=120
+            ) as resp:
+                for line in resp.iter_lines():
+                    if line:
+                        data = json.loads(line)
+                        if "response" in data:
+                            yield data["response"]
+                        if data.get("done"):
+                            break
+        except Exception as e:
+            yield f"Error: {e}"
+# Global Ollama client
+ollama = OllamaClient()
+def call_llm(prompt: str, system: str = None, stream: bool = False):
+    """
+    Call LLM - tries Ollama first (local), falls back to HuggingFace API.
+    """
+    # Try Ollama first (local, fast)
+    if ollama.is_available():
+        result = ollama.generate(prompt, system, stream=stream)
+        if result:
+            return result
+    # Fallback to HuggingFace API
+    try:
+        headers = {"Content-Type": "application/json"}
+        if system:
+            full_prompt = f"<s>[INST] {system}\n\n{prompt} [/INST]"
+        else:
+            full_prompt = f"<s>[INST] {prompt} [/INST]"
+        payload = {
+            "inputs": full_prompt,
+            "parameters": {
+                "max_new_tokens": 1500,
+                "temperature": 0.3,
+                "return_full_text": False
+            }
+        }
+        resp = requests.post(HF_API_URL, headers=headers, json=payload, timeout=90)
+        if resp.status_code == 200:
+            result = resp.json()
+            if isinstance(result, list) and len(result) > 0:
+                return result[0].get("generated_text", "")
+    except Exception as e:
+        print(f"HuggingFace API error: {e}")
+    return None
+def get_llm_status() -> str:
+    """Get current LLM status."""
+    if ollama.is_available():
+        return f"🟢 Ollama ({OLLAMA_MODEL}) - LOCAL"
+    else:
+        return "🟡 HuggingFace API - CLOUD (slower)"
+# ================== AUDIO FEATURES ==================
 @dataclass
 class AudioFeatures:
+    """Audio features for LLM analysis."""
     duration: float
     peak_frequency: float
     freq_range: Tuple[float, float]
     snr_db: float
     def to_description(self) -> str:
+        """Convert to natural language for LLM."""
+        freq_desc = self._describe_freq()
+        return f"""Audio analysis results:
 - Duration: {self.duration:.1f} seconds
 - Dominant frequency: {self.peak_frequency:.0f} Hz ({freq_desc})
 - Frequency range: {self.freq_range[0]:.0f} - {self.freq_range[1]:.0f} Hz
+- Call pattern: {"melodic" if self.is_melodic else "monotone"}, {"repetitive" if self.is_repetitive else "variable"}
+- Syllables: {self.num_syllables} detected ({self.syllable_rate:.1f}/second)
+- Amplitude pattern: {self.amplitude_pattern}
+- Recording quality: SNR {self.snr_db:.0f} dB ({"good" if self.snr_db > 15 else "fair" if self.snr_db > 8 else "poor"})"""
+    def _describe_freq(self) -> str:
         f = self.peak_frequency
+        if f < 500: return "very low - large bird like coucal, peacock, owl"
+        elif f < 1000: return "low - crow, dove, large bird"
+        elif f < 2000: return "low-medium - cuckoo, myna, babbler"
+        elif f < 4000: return "medium - most songbirds, bulbul, robin"
+        elif f < 6000: return "medium-high - warbler, tailorbird"
+        elif f < 8000: return "high - sunbird, small passerine"
+        else: return "very high - alarm call or insect-like"
+def extract_features(audio: np.ndarray, sr: int) -> AudioFeatures:
+    """Extract audio features."""
     duration = len(audio) / sr
     audio = audio / (np.max(np.abs(audio)) + 1e-8)
+    # Spectral
     freqs, psd = signal.welch(audio, sr, nperseg=min(4096, len(audio)))
+    peak_freq = freqs[np.argmax(psd)]
     cumsum = np.cumsum(psd) / (np.sum(psd) + 1e-10)
     freq_low = freqs[np.searchsorted(cumsum, 0.10)]
     freq_high = freqs[np.searchsorted(cumsum, 0.90)]
+    centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-10)
+    # Envelope
     envelope = np.abs(signal.hilbert(audio))
+    k = int(0.02 * sr)
+    if k > 0:
+        envelope = gaussian_filter1d(envelope, k)
+    # Syllables
     n_fft, hop = 2048, 512
     _, _, Zxx = signal.stft(audio, sr, nperseg=n_fft, noverlap=n_fft-hop)
     flux = np.sum(np.maximum(0, np.diff(np.abs(Zxx), axis=1)), axis=0)
+    num_syl = 0
     if len(flux) > 0:
         flux = flux / (np.max(flux) + 1e-10)
+        th = np.mean(flux) + 0.5 * np.std(flux)
+        peaks, _ = signal.find_peaks(flux, height=th, distance=max(1, int(0.05*sr/hop)))
+        num_syl = len(peaks)
+    syl_rate = num_syl / duration if duration > 0 else 0
+    # Melodic
     is_melodic = False
     if len(audio) > sr:
         chunks = np.array_split(audio, min(20, max(5, int(duration*4))))
         if chunk_freqs:
             is_melodic = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10) > 0.15
     # Amplitude pattern
+    amp_pattern = "unknown"
     if len(envelope) > 100:
         q = len(envelope) // 4
+        s, e = np.mean(envelope[:q]), np.mean(envelope[-q:])
+        v = np.std(envelope) / (np.mean(envelope) + 1e-10)
+        if v > 0.6: amp_pattern = "varied"
+        elif e > s * 1.3: amp_pattern = "ascending"
+        elif e < s * 0.7: amp_pattern = "descending"
         else: amp_pattern = "steady"
     # SNR
     noise = np.percentile(np.abs(audio), 5)
         duration=duration,
         peak_frequency=float(peak_freq),
         freq_range=(float(freq_low), float(freq_high)),
+        spectral_centroid=float(centroid),
+        num_syllables=num_syl,
+        syllable_rate=float(syl_rate),
         is_melodic=is_melodic,
+        is_repetitive=syl_rate > 3,
         amplitude_pattern=amp_pattern,
         snr_db=float(snr)
     )
+def preprocess_audio(audio_data: np.ndarray, sr: int) -> Tuple[np.ndarray, int]:
+    """Preprocess audio."""
+    if audio_data.dtype == np.int16:
+        audio_data = audio_data.astype(np.float32) / 32768.0
+    elif audio_data.dtype == np.int32:
+        audio_data = audio_data.astype(np.float32) / 2147483648.0
     else:
+        audio_data = audio_data.astype(np.float32)
+    if len(audio_data.shape) > 1:
+        audio_data = np.mean(audio_data, axis=1)
+    if sr != SAMPLE_RATE:
+        num = int(len(audio_data) * SAMPLE_RATE / sr)
+        audio_data = signal.resample(audio_data, num)
+        sr = SAMPLE_RATE
+    audio_data = audio_data / (np.max(np.abs(audio_data)) + 1e-8)
+    # Bandpass
+    nyq = sr / 2
+    low, high = 150 / nyq, min(15000 / nyq, 0.99)
+    b, a = signal.butter(4, [low, high], btype='band')
+    audio_data = signal.filtfilt(b, a, audio_data)
+    return audio_data, sr
 # ================== LLM PROMPTS ==================
+BIRD_EXPERT_SYSTEM = """You are an expert ornithologist with knowledge of 10,000+ bird species worldwide.
+You specialize in Indian birds (1,300+ species).
+Your task: Identify bird species from audio features, images, or descriptions.
+IMPORTANT RULES:
+1. Identify ALL birds that could be present (multi-bird detection)
+2. Include any bird with confidence >= 50%
+3. Consider frequency, pattern, syllable rate, and context
+4. For India, consider common species first but don't ignore rare possibilities
+You MUST respond in this EXACT JSON format:
 {
     "birds": [
         {
             "name": "Common Name",
             "scientific_name": "Genus species",
             "confidence": 85,
+            "reasoning": "Brief explanation of why this bird matches"
         }
     ],
+    "analysis": "Overall analysis of the recording/image/description"
 }"""
+def get_bird_image(name: str) -> str:
+    """Get image URL for bird."""
+    if name in BIRD_IMAGES:
+        return BIRD_IMAGES[name]
+    name_lower = name.lower()
+    for bird, url in BIRD_IMAGES.items():
+        if bird.lower() in name_lower or name_lower in bird.lower():
+            return url
+    return DEFAULT_IMAGE
+def format_results(llm_response: str) -> str:
     """Parse LLM response and format with images."""
+    if not llm_response:
+        return "### ⚠️ No response from LLM"
     try:
+        # Extract JSON
+        start = llm_response.find('{')
+        end = llm_response.rfind('}') + 1
+        if start >= 0 and end > start:
+            data = json.loads(llm_response[start:end])
         else:
+            # Try to find birds mentioned in text
+            return f"### 🤖 AI Analysis\n\n{llm_response}"
         birds = data.get("birds", [])
         analysis = data.get("analysis", "")
         if not birds:
             return f"### ❌ No birds identified\n\n{analysis}"
+        output = f"## 🐦 Birds Identified\n\n*{analysis}*\n\n"
         for i, bird in enumerate(birds, 1):
             name = bird.get("name", "Unknown")
             scientific = bird.get("scientific_name", "")
+            conf = bird.get("confidence", 0)
+            reason = bird.get("reasoning", "")
+            img = get_bird_image(name)
+            if conf >= 80:
                 badge = "🟢 HIGH"
+            elif conf >= 60:
                 badge = "🟡 MEDIUM"
             else:
                 badge = "🔴 LOW"
             output += f"""
 ---
+### {i}. **{name}** ({conf}%) {badge}
+![{name}]({img})
 **Scientific Name:** _{scientific}_
+**Why this bird:** {reason}
 """
         return output
+    except json.JSONDecodeError:
+        return f"### 🤖 AI Analysis\n\n{llm_response}"
 # ================== IDENTIFICATION FUNCTIONS ==================
+def identify_audio(audio, location: str = "", month: str = ""):
+    """Identify bird from audio using LLM."""
     if audio is None:
+        return "### ⚠️ Please record or upload bird audio"
+    status = get_llm_status()
+    yield f"### 🔄 Processing audio...\n\n**LLM Status:** {status}"
     try:
         sr, audio_data = audio
         audio_data, sr = preprocess_audio(audio_data, sr)
+        yield f"### 🔄 Extracting features...\n\n**LLM Status:** {status}"
+        features = extract_features(audio_data, sr)
+        prompt = f"""Identify the bird(s) in this recording:
 {features.to_description()}
 """
         if location:
+            prompt += f"\nLocation: {location}"
         if month:
+            prompt += f"\nMonth: {month}"
+        prompt += "\n\nIdentify ALL birds that could be making these sounds (confidence >= 50%)."
+        yield f"### 🔄 Consulting AI ({status})...\n\n**Audio Features:**\n{features.to_description()}"
+        response = call_llm(prompt, BIRD_EXPERT_SYSTEM)
         if response:
+            result = format_results(response)
+            result += f"\n\n---\n\n### 📊 Audio Analysis\n{features.to_description()}"
+            result += f"\n\n**LLM:** {status}"
             yield result
         else:
+            yield f"""### ⚠️ LLM not responding
+**LLM Status:** {status}
 **Your audio features:**
+{features.to_description()}
+**To fix:**
+1. Make sure Ollama is running: `ollama serve`
+2. Pull the model: `ollama pull {OLLAMA_MODEL}`
+3. Try again
+"""
     except Exception as e:
+        yield f"### ❌ Error: {str(e)}"
+def identify_description(description: str):
+    """Identify bird from description using LLM."""
     if not description or len(description.strip()) < 5:
+        return "### ⚠️ Please enter a description (at least 5 characters)"
+    status = get_llm_status()
+    yield f"### 🔄 Analyzing description...\n\n**LLM Status:** {status}"
     prompt = f"""Identify the bird(s) based on this description:
 {description}
+Consider Indian birds especially. List all matching birds with confidence >= 50%."""
+    response = call_llm(prompt, BIRD_EXPERT_SYSTEM)
     if response:
+        result = format_results(response)
+        result += f"\n\n**LLM:** {status}"
+        yield result
     else:
+        yield f"""### ⚠️ LLM not responding
+**LLM Status:** {status}
+**To fix:**
+1. Make sure Ollama is running: `ollama serve`
+2. Pull the model: `ollama pull {OLLAMA_MODEL}`
+"""
+def identify_image(image):
+    """Identify bird from image using LLM."""
     if image is None:
+        return "### ⚠️ Please upload or capture a bird image"
+    status = get_llm_status()
+    yield f"### 🔄 Analyzing image...\n\n**LLM Status:** {status}"
     try:
         if hasattr(image, 'numpy'):
+            img = image.numpy()
         else:
+            img = np.array(image)
+        # Color analysis
         colors = []
+        if len(img.shape) == 3 and img.shape[2] >= 3:
+            r, g, b = np.mean(img[:,:,0]), np.mean(img[:,:,1]), np.mean(img[:,:,2])
+            if g > r * 1.1 and g > b * 1.1: colors.append("green")
+            if b > r * 1.1 and b > g: colors.append("blue")
+            if r > g * 1.2 and r > b * 1.2: colors.append("red/brown")
+            if r > 180 and g > 180 and b > 180: colors.append("white")
+            if r < 80 and g < 80 and b < 80: colors.append("black")
+            if r > 150 and g > 120 and b < 100: colors.append("yellow")
+        color_desc = ", ".join(colors) if colors else "mixed"
+        yield f"### 🔄 Detected colors: {color_desc}\n\n**LLM Status:** {status}"
+        prompt = f"""Identify the bird in this image.
 Detected dominant colors: {color_desc}
+Image size: {img.shape[1]}x{img.shape[0]} pixels
+Based on these colors, what Indian bird species could this be?
+List all matching birds with confidence >= 50%."""
+        response = call_llm(prompt, BIRD_EXPERT_SYSTEM)
         if response:
+            result = format_results(response)
+            result += f"\n\n**Detected colors:** {color_desc}"
+            result += f"\n\n**LLM:** {status}"
+            yield result
         else:
+            yield f"### ⚠️ LLM not responding\n\n**Detected colors:** {color_desc}"
     except Exception as e:
         yield f"### ❌ Error: {str(e)}"
 # ================== GRADIO UI ==================
+with gr.Blocks(title="🐦 BirdSense Pro - Ollama LLM") as demo:
     gr.HTML("""
     <div style="text-align: center; background: linear-gradient(135deg, #1a4d2e 0%, #2d5a3e 50%, #1a4d2e 100%); padding: 2rem; border-radius: 16px; margin-bottom: 1.5rem;">
         <h1 style="color: #4ade80; font-size: 2.5rem; margin: 0;">🐦 BirdSense Pro</h1>
+        <p style="color: #94a3b8; font-size: 1.2rem;">Local LLM Bird Identification (Ollama)</p>
         <p style="color: #64748b; font-size: 0.9rem;">
+            🤖 Uses LOCAL Ollama LLM • 10,000+ species • Multi-bird detection
         </p>
     </div>
     """)
+    # LLM Status indicator
+    status_text = get_llm_status()
+    gr.Markdown(f"**Current LLM:** {status_text}")
     with gr.Tabs():
+        # AUDIO TAB
+        with gr.Tab("🎤 Audio"):
             gr.Markdown("""
+### Record or upload bird audio
+The audio features are extracted and sent to the LLM (Ollama) which identifies ALL matching birds.
             """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    audio_in = gr.Audio(sources=["microphone", "upload"], type="numpy", label="🎤 Bird Audio")
                     with gr.Row():
+                        loc_in = gr.Textbox(label="📍 Location", placeholder="e.g., Western Ghats")
+                        month_in = gr.Dropdown(
+                            label="📅 Month",
+                            choices=["", "January", "February", "March", "April", "May",
+                                    "June", "July", "August", "September", "October",
+                                    "November", "December"]
                         )
+                    audio_btn = gr.Button("🔍 Identify with Ollama LLM", variant="primary", size="lg")
                 with gr.Column(scale=2):
+                    audio_out = gr.Markdown()
+            audio_btn.click(identify_audio, [audio_in, loc_in, month_in], audio_out)
+        # DESCRIPTION TAB
+        with gr.Tab("📝 Description"):
             gr.Markdown("""
+### Describe the bird you saw or heard
+The LLM will analyze your description and identify matching species.
             """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    desc_in = gr.Textbox(
+                        label="Bird Description",
+                        placeholder="Example: Small green bird with red forehead, making tuk-tuk-tuk sound like a hammer",
+                        lines=4
                     )
+                    desc_btn = gr.Button("🔍 Identify with Ollama LLM", variant="primary", size="lg")
                 with gr.Column(scale=2):
+                    desc_out = gr.Markdown()
+            desc_btn.click(identify_description, [desc_in], desc_out)
+        # IMAGE TAB
+        with gr.Tab("📷 Image"):
             gr.Markdown("""
+### Upload or capture a bird image
+Colors are extracted and sent to the LLM for identification.
             """)
             with gr.Row():
                 with gr.Column(scale=1):
+                    img_in = gr.Image(sources=["upload", "webcam"], type="numpy", label="📷 Bird Image")
+                    img_btn = gr.Button("🔍 Identify with Ollama LLM", variant="primary", size="lg")
                 with gr.Column(scale=2):
+                    img_out = gr.Markdown()
+            img_btn.click(identify_image, [img_in], img_out)
+        # SETUP TAB
+        with gr.Tab("⚙️ Setup"):
+            gr.Markdown(f"""
+## Ollama Setup
+BirdSense Pro uses **Ollama** for local LLM inference.
+### Current Status: {get_llm_status()}
+### Setup Instructions:
+1. **Install Ollama:**
+   ```bash
+   # macOS
+   brew install ollama
+   # Or download from https://ollama.ai
+   ```
+2. **Start Ollama:**
+   ```bash
+   ollama serve
+   ```
+3. **Pull the model:**
+   ```bash
+   ollama pull {OLLAMA_MODEL}
+   ```
+4. **Refresh this page and try again!**
+### Model Used: `{OLLAMA_MODEL}`
+This is a fast, efficient model good for bird identification.
+For better accuracy, you can also try:
+- `llama3.2:3b`
+- `mistral:7b`
+- `qwen2.5:7b`
+Change the model in the code: `OLLAMA_MODEL = "your-model"`
             """)
     gr.HTML("""
     <div style="text-align: center; padding: 1rem; margin-top: 1rem; border-top: 1px solid #334155;">
         <p style="color: #4ade80; font-weight: bold;">🐦 BirdSense Pro - CSCR Initiative</p>
         <p style="color: #64748b;">
+            Powered by LOCAL Ollama LLM • <a href="https://github.com/sohamzycus/eagv2/tree/master/birdsense" style="color: #4ade80;">GitHub</a>
         </p>
     </div>
     """)
 if __name__ == "__main__":
+    print(f"\n🐦 BirdSense Pro")
+    print(f"LLM Status: {get_llm_status()}")
+    print(f"\nStarting server...")
     demo.launch(server_name="0.0.0.0", server_port=7860)