Spaces:

sohiyiy
/

birdsense-pro

Running

App Files Files Community

sohiyiy commited on 7 days ago

Commit

de8eebd

verified ·

1 Parent(s): 2ee10d3

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

ollama_client.py +254 -0
zero_shot_identifier.py +470 -0

ollama_client.py ADDED Viewed

	@@ -0,0 +1,254 @@

+"""
+Ollama Client for BirdSense.
+Provides interface to local LLM models via Ollama for:
+- Species reasoning and verification
+- Description matching
+- Natural language queries about birds
+"""
+import httpx
+import json
+from typing import Optional, Dict, Any, List, AsyncGenerator
+from dataclasses import dataclass
+import asyncio
+@dataclass
+class OllamaConfig:
+    """Configuration for Ollama client."""
+    base_url: str = "http://localhost:11434"
+    model: str = "phi3:mini"  # Lightweight model for edge deployment
+    temperature: float = 0.3
+    max_tokens: int = 512
+    timeout: int = 30
+    stream: bool = False
+class OllamaClient:
+    """
+    Async client for Ollama API.
+    Supports:
+    - Text generation
+    - Streaming responses
+    - Model listing and management
+    """
+    def __init__(self, config: Optional[OllamaConfig] = None):
+        self.config = config or OllamaConfig()
+        self._client: Optional[httpx.AsyncClient] = None
+    async def __aenter__(self):
+        self._client = httpx.AsyncClient(
+            base_url=self.config.base_url,
+            timeout=httpx.Timeout(self.config.timeout)
+        )
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self._client:
+            await self._client.aclose()
+    @property
+    def client(self) -> httpx.AsyncClient:
+        if self._client is None:
+            self._client = httpx.AsyncClient(
+                base_url=self.config.base_url,
+                timeout=httpx.Timeout(self.config.timeout)
+            )
+        return self._client
+    async def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        model: Optional[str] = None
+    ) -> str:
+        """
+        Generate text completion.
+        Args:
+            prompt: User prompt
+            system_prompt: System instruction
+            temperature: Sampling temperature (default from config)
+            max_tokens: Max tokens to generate
+            model: Model to use (default from config)
+        Returns:
+            Generated text response
+        """
+        payload = {
+            "model": model or self.config.model,
+            "prompt": prompt,
+            "stream": False,
+            "options": {
+                "temperature": temperature or self.config.temperature,
+                "num_predict": max_tokens or self.config.max_tokens
+            }
+        }
+        if system_prompt:
+            payload["system"] = system_prompt
+        try:
+            response = await self.client.post("/api/generate", json=payload)
+            response.raise_for_status()
+            result = response.json()
+            return result.get("response", "")
+        except httpx.HTTPError as e:
+            raise ConnectionError(f"Failed to connect to Ollama: {e}")
+    async def generate_stream(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        model: Optional[str] = None
+    ) -> AsyncGenerator[str, None]:
+        """
+        Stream text generation.
+        Yields:
+            Chunks of generated text
+        """
+        payload = {
+            "model": model or self.config.model,
+            "prompt": prompt,
+            "stream": True,
+            "options": {
+                "temperature": self.config.temperature,
+                "num_predict": self.config.max_tokens
+            }
+        }
+        if system_prompt:
+            payload["system"] = system_prompt
+        async with self.client.stream("POST", "/api/generate", json=payload) as response:
+            async for line in response.aiter_lines():
+                if line:
+                    data = json.loads(line)
+                    if "response" in data:
+                        yield data["response"]
+                    if data.get("done", False):
+                        break
+    async def chat(
+        self,
+        messages: List[Dict[str, str]],
+        model: Optional[str] = None
+    ) -> str:
+        """
+        Chat completion with message history.
+        Args:
+            messages: List of {"role": "user/assistant/system", "content": "..."}
+            model: Model to use
+        Returns:
+            Assistant response
+        """
+        payload = {
+            "model": model or self.config.model,
+            "messages": messages,
+            "stream": False,
+            "options": {
+                "temperature": self.config.temperature,
+                "num_predict": self.config.max_tokens
+            }
+        }
+        try:
+            response = await self.client.post("/api/chat", json=payload)
+            response.raise_for_status()
+            result = response.json()
+            return result.get("message", {}).get("content", "")
+        except httpx.HTTPError as e:
+            raise ConnectionError(f"Failed to connect to Ollama: {e}")
+    async def list_models(self) -> List[Dict[str, Any]]:
+        """List available models."""
+        try:
+            response = await self.client.get("/api/tags")
+            response.raise_for_status()
+            return response.json().get("models", [])
+        except httpx.HTTPError as e:
+            raise ConnectionError(f"Failed to list models: {e}")
+    async def is_model_available(self, model: Optional[str] = None) -> bool:
+        """Check if specified model is available."""
+        model = model or self.config.model
+        try:
+            models = await self.list_models()
+            return any(m.get("name", "").startswith(model.split(":")[0]) for m in models)
+        except Exception:
+            return False
+    async def health_check(self) -> bool:
+        """Check if Ollama server is running."""
+        try:
+            response = await self.client.get("/api/tags")
+            return response.status_code == 200
+        except Exception:
+            return False
+class SyncOllamaClient:
+    """
+    Synchronous wrapper for OllamaClient.
+    Convenience class for non-async code paths.
+    """
+    def __init__(self, config: Optional[OllamaConfig] = None):
+        self.config = config or OllamaConfig()
+        self._async_client = OllamaClient(config)
+    def _run(self, coro):
+        """Run async coroutine synchronously."""
+        try:
+            loop = asyncio.get_event_loop()
+            if loop.is_running():
+                # If we're in an async context, use nest_asyncio pattern
+                import nest_asyncio
+                nest_asyncio.apply()
+                return loop.run_until_complete(coro)
+            else:
+                return loop.run_until_complete(coro)
+        except RuntimeError:
+            # No event loop exists
+            return asyncio.run(coro)
+    def generate(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        model: Optional[str] = None
+    ) -> str:
+        """Generate text completion synchronously."""
+        return self._run(
+            self._async_client.generate(
+                prompt, system_prompt, temperature, max_tokens, model
+            )
+        )
+    def chat(
+        self,
+        messages: List[Dict[str, str]],
+        model: Optional[str] = None
+    ) -> str:
+        """Chat completion synchronously."""
+        return self._run(self._async_client.chat(messages, model))
+    def health_check(self) -> bool:
+        """Check Ollama health synchronously."""
+        return self._run(self._async_client.health_check())
+    def is_model_available(self, model: Optional[str] = None) -> bool:
+        """Check model availability synchronously."""
+        return self._run(self._async_client.is_model_available(model))

zero_shot_identifier.py ADDED Viewed

	@@ -0,0 +1,470 @@

+"""
+Zero-Shot Bird Identification using LLM.
+This is the CORE innovation: Instead of training on every bird,
+we use the LLM's knowledge to identify ANY bird from audio features.
+The LLM has learned about thousands of bird species from its training data,
+including their calls, habitats, and behaviors.
+"""
+import json
+import logging
+from dataclasses import dataclass
+from typing import List, Dict, Any, Optional, Tuple
+import numpy as np
+from .ollama_client import OllamaClient, OllamaConfig
+logger = logging.getLogger(__name__)
+@dataclass
+class AudioFeatures:
+    """Extracted audio features for LLM analysis."""
+    duration: float
+    dominant_frequency_hz: float
+    frequency_range: Tuple[float, float]
+    spectral_centroid: float
+    spectral_bandwidth: float
+    tempo_bpm: float
+    num_syllables: int
+    syllable_rate: float  # syllables per second
+    is_melodic: bool
+    is_repetitive: bool
+    amplitude_pattern: str  # "constant", "rising", "falling", "varied"
+    estimated_snr_db: float
+    quality_score: float
+@dataclass
+class ZeroShotResult:
+    """Result from zero-shot identification."""
+    species_name: str
+    scientific_name: str
+    confidence: float  # 0.0 to 1.0
+    confidence_label: str  # "high", "medium", "low"
+    reasoning: str
+    key_features_matched: List[str]
+    alternative_species: List[Dict[str, Any]]
+    is_indian_bird: bool
+    is_unusual_sighting: bool
+    unusual_reason: Optional[str]
+    call_description: str
+class ZeroShotBirdIdentifier:
+    """
+    Zero-shot bird identification using LLM.
+    This approach:
+    1. Extracts audio features (frequency, pattern, duration)
+    2. Sends features to LLM with expert prompt
+    3. LLM identifies bird from its knowledge base
+    4. Returns species with confidence and reasoning
+    Benefits:
+    - No training required
+    - Can identify ANY of 10,000+ bird species
+    - Works for non-Indian birds too (with novelty flag)
+    - Explainable results
+    """
+    def __init__(self, ollama_config: Optional[OllamaConfig] = None):
+        self.ollama = OllamaClient(ollama_config or OllamaConfig(model="qwen2.5:3b"))
+        self.is_ready = False
+    def initialize(self) -> bool:
+        """Check if LLM is available."""
+        try:
+            import asyncio
+            async def _check():
+                return await self.ollama.health_check()
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    import nest_asyncio
+                    nest_asyncio.apply()
+                self.is_ready = loop.run_until_complete(_check())
+            except RuntimeError:
+                self.is_ready = asyncio.run(_check())
+            return self.is_ready
+        except Exception as e:
+            logger.warning(f"Failed to initialize LLM: {e}")
+            return False
+    def extract_features(
+        self,
+        audio: np.ndarray,
+        sample_rate: int = 32000,
+        mel_spec: Optional[np.ndarray] = None
+    ) -> AudioFeatures:
+        """Extract audio features for LLM analysis."""
+        import scipy.signal as signal
+        duration = len(audio) / sample_rate
+        # Frequency analysis
+        freqs, psd = signal.welch(audio, sample_rate, nperseg=2048)
+        # Dominant frequency
+        dominant_idx = np.argmax(psd)
+        dominant_freq = freqs[dominant_idx]
+        # Frequency range (where 90% of energy is)
+        cumsum = np.cumsum(psd) / np.sum(psd)
+        freq_low = freqs[np.searchsorted(cumsum, 0.05)]
+        freq_high = freqs[np.searchsorted(cumsum, 0.95)]
+        # Spectral centroid
+        spectral_centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-10)
+        # Spectral bandwidth
+        spectral_bandwidth = np.sqrt(np.sum(((freqs - spectral_centroid) ** 2) * psd) / (np.sum(psd) + 1e-10))
+        # Amplitude envelope analysis
+        envelope = np.abs(signal.hilbert(audio))
+        envelope_smooth = signal.medfilt(envelope, 1001)
+        # Detect syllables (peaks in envelope)
+        peaks, _ = signal.find_peaks(envelope_smooth, height=0.1 * np.max(envelope_smooth), distance=sample_rate // 10)
+        num_syllables = len(peaks)
+        syllable_rate = num_syllables / duration if duration > 0 else 0
+        # Amplitude pattern
+        if len(envelope_smooth) > 100:
+            start_amp = np.mean(envelope_smooth[:len(envelope_smooth)//4])
+            end_amp = np.mean(envelope_smooth[-len(envelope_smooth)//4:])
+            amp_var = np.std(envelope_smooth) / (np.mean(envelope_smooth) + 1e-10)
+            if amp_var > 0.5:
+                amp_pattern = "varied"
+            elif end_amp > start_amp * 1.3:
+                amp_pattern = "rising"
+            elif end_amp < start_amp * 0.7:
+                amp_pattern = "falling"
+            else:
+                amp_pattern = "constant"
+        else:
+            amp_pattern = "constant"
+        # Melodic detection (frequency variation)
+        if len(audio) > sample_rate:
+            chunks = np.array_split(audio, 10)
+            chunk_freqs = []
+            for chunk in chunks:
+                if len(chunk) > 512:
+                    f, p = signal.welch(chunk, sample_rate, nperseg=512)
+                    chunk_freqs.append(f[np.argmax(p)])
+            freq_variation = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10)
+            is_melodic = freq_variation > 0.1
+        else:
+            is_melodic = False
+        # Repetitiveness detection
+        if num_syllables >= 3:
+            if syllable_rate > 1.5 and syllable_rate < 10:  # Regular pattern
+                is_repetitive = True
+            else:
+                is_repetitive = False
+        else:
+            is_repetitive = num_syllables >= 2
+        # SNR estimation
+        noise_floor = np.percentile(np.abs(audio), 10)
+        signal_peak = np.percentile(np.abs(audio), 95)
+        snr_db = 20 * np.log10((signal_peak + 1e-10) / (noise_floor + 1e-10))
+        # Quality score
+        quality_score = min(1.0, max(0.0, (snr_db - 5) / 25))
+        # Tempo (for rhythmic calls)
+        if num_syllables >= 2:
+            tempo_bpm = syllable_rate * 60
+        else:
+            tempo_bpm = 0
+        return AudioFeatures(
+            duration=duration,
+            dominant_frequency_hz=float(dominant_freq),
+            frequency_range=(float(freq_low), float(freq_high)),
+            spectral_centroid=float(spectral_centroid),
+            spectral_bandwidth=float(spectral_bandwidth),
+            tempo_bpm=float(tempo_bpm),
+            num_syllables=num_syllables,
+            syllable_rate=float(syllable_rate),
+            is_melodic=is_melodic,
+            is_repetitive=is_repetitive,
+            amplitude_pattern=amp_pattern,
+            estimated_snr_db=float(snr_db),
+            quality_score=float(quality_score)
+        )
+    def identify(
+        self,
+        features: AudioFeatures,
+        location: Optional[str] = None,
+        month: Optional[int] = None,
+        user_description: Optional[str] = None
+    ) -> ZeroShotResult:
+        """
+        Identify bird species using zero-shot LLM inference.
+        This is the NOVEL approach - using LLM's knowledge to identify
+        any bird without needing to train on that specific species.
+        """
+        # Build expert prompt
+        prompt = self._build_identification_prompt(features, location, month, user_description)
+        # Call LLM (synchronously using asyncio)
+        try:
+            import asyncio
+            async def _generate():
+                return await self.ollama.generate(
+                    prompt,
+                    system_prompt=self._get_expert_system_prompt(),
+                    temperature=0.3,  # Lower for more deterministic
+                    max_tokens=1000
+                )
+            # Run async in sync context
+            try:
+                loop = asyncio.get_event_loop()
+                if loop.is_running():
+                    # Use nest_asyncio for nested event loops
+                    import nest_asyncio
+                    nest_asyncio.apply()
+                response = loop.run_until_complete(_generate())
+            except RuntimeError:
+                # No event loop running
+                response = asyncio.run(_generate())
+            # Parse response
+            return self._parse_identification_response(response, features)
+        except Exception as e:
+            logger.error(f"LLM identification failed: {e}")
+            return self._fallback_result(features)
+    def _get_expert_system_prompt(self) -> str:
+        """Expert ornithologist system prompt."""
+        return """You are an expert ornithologist with deep knowledge of bird vocalizations worldwide.
+You can identify birds by their calls based on frequency, pattern, duration, and context.
+Your expertise includes:
+- 10,000+ bird species globally
+- Detailed knowledge of Indian birds (1,300+ species)
+- Ability to distinguish similar-sounding species
+- Understanding of seasonal and geographic variations
+When identifying birds:
+1. Consider the audio characteristics carefully
+2. Match against known bird call patterns
+3. Account for regional variations
+4. Flag unusual or rare sightings
+5. Provide confidence based on how well features match
+Always respond in the exact JSON format requested."""
+    def _build_identification_prompt(
+        self,
+        features: AudioFeatures,
+        location: Optional[str],
+        month: Optional[int],
+        user_description: Optional[str]
+    ) -> str:
+        """Build identification prompt from audio features."""
+        # Describe frequency in bird call terms
+        freq_desc = self._describe_frequency(features.dominant_frequency_hz)
+        # Season
+        season = self._get_season(month) if month else "unknown"
+        prompt = f"""Identify this bird based on its call characteristics:
+## Audio Features
+- **Duration**: {features.duration:.1f} seconds
+- **Dominant Frequency**: {features.dominant_frequency_hz:.0f} Hz ({freq_desc})
+- **Frequency Range**: {features.frequency_range[0]:.0f} - {features.frequency_range[1]:.0f} Hz
+- **Call Pattern**: {"Melodic/varied" if features.is_melodic else "Monotone"}, {"Repetitive" if features.is_repetitive else "Non-repetitive"}
+- **Syllables**: {features.num_syllables} syllables at {features.syllable_rate:.1f}/second
+- **Rhythm**: {features.tempo_bpm:.0f} BPM (beats per minute)
+- **Amplitude**: {features.amplitude_pattern} pattern
+## Context
+- **Location**: {location or "India (unspecified)"}
+- **Season**: {season}
+- **Recording Quality**: {self._quality_label(features.quality_score)} (SNR: {features.estimated_snr_db:.0f}dB)
+"""
+        if user_description:
+            prompt += f"- **Observer Notes**: {user_description}\n"
+        prompt += """
+## Task
+Based on these audio features, identify the most likely bird species.
+Respond in this exact JSON format:
+{
+    "species_name": "Common Name",
+    "scientific_name": "Genus species",
+    "confidence": 0.85,
+    "reasoning": "Detailed explanation of why this species matches...",
+    "key_features_matched": ["feature1", "feature2"],
+    "alternatives": [
+        {"name": "Alternative 1", "scientific": "Genus species", "confidence": 0.1},
+        {"name": "Alternative 2", "scientific": "Genus species", "confidence": 0.05}
+    ],
+    "is_indian_bird": true,
+    "is_unusual": false,
+    "unusual_reason": null,
+    "typical_call": "Description of what this bird typically sounds like"
+}"""
+        return prompt
+    def _describe_frequency(self, freq: float) -> str:
+        """Describe frequency in bird call terms."""
+        if freq < 500:
+            return "very low (large bird or booming call)"
+        elif freq < 1000:
+            return "low (owl, dove, or large bird)"
+        elif freq < 2000:
+            return "low-medium (cuckoo, crow, or medium bird)"
+        elif freq < 4000:
+            return "medium (most songbirds)"
+        elif freq < 6000:
+            return "medium-high (warbler, sunbird)"
+        elif freq < 8000:
+            return "high (small passerine)"
+        else:
+            return "very high (insect-like or whistle)"
+    def _get_season(self, month: int) -> str:
+        """Get Indian season from month."""
+        if month in [12, 1, 2]:
+            return "winter (Dec-Feb) - winter migrants present"
+        elif month in [3, 4, 5]:
+            return "summer/pre-monsoon (Mar-May) - breeding season"
+        elif month in [6, 7, 8, 9]:
+            return "monsoon (Jun-Sep)"
+        else:
+            return "post-monsoon (Oct-Nov) - migration period"
+    def _quality_label(self, score: float) -> str:
+        """Convert quality score to label."""
+        if score > 0.8:
+            return "excellent"
+        elif score > 0.6:
+            return "good"
+        elif score > 0.4:
+            return "fair"
+        else:
+            return "poor"
+    def _parse_identification_response(
+        self,
+        response: str,
+        features: AudioFeatures
+    ) -> ZeroShotResult:
+        """Parse LLM response into structured result."""
+        try:
+            # Try to extract JSON from response
+            json_start = response.find('{')
+            json_end = response.rfind('}') + 1
+            if json_start >= 0 and json_end > json_start:
+                json_str = response[json_start:json_end]
+                data = json.loads(json_str)
+                confidence = float(data.get('confidence', 0.5))
+                return ZeroShotResult(
+                    species_name=data.get('species_name', 'Unknown'),
+                    scientific_name=data.get('scientific_name', ''),
+                    confidence=confidence,
+                    confidence_label=self._confidence_label(confidence),
+                    reasoning=data.get('reasoning', ''),
+                    key_features_matched=data.get('key_features_matched', []),
+                    alternative_species=data.get('alternatives', []),
+                    is_indian_bird=data.get('is_indian_bird', True),
+                    is_unusual_sighting=data.get('is_unusual', False),
+                    unusual_reason=data.get('unusual_reason'),
+                    call_description=data.get('typical_call', '')
+                )
+        except json.JSONDecodeError as e:
+            logger.warning(f"Failed to parse LLM JSON: {e}")
+        # Fallback: try to extract species name from text
+        return self._fallback_result(features, response)
+    def _confidence_label(self, confidence: float) -> str:
+        """Convert confidence to label."""
+        if confidence >= 0.8:
+            return "high"
+        elif confidence >= 0.6:
+            return "medium"
+        else:
+            return "low"
+    def _fallback_result(
+        self,
+        features: AudioFeatures,
+        llm_response: str = ""
+    ) -> ZeroShotResult:
+        """Generate fallback result when LLM parsing fails."""
+        # Try to guess based on frequency
+        if features.dominant_frequency_hz < 1000:
+            if features.is_repetitive:
+                species = "Spotted Owlet"
+                scientific = "Athene brama"
+            else:
+                species = "Indian Cuckoo"
+                scientific = "Cuculus micropterus"
+        elif features.dominant_frequency_hz < 3000:
+            if features.is_melodic:
+                species = "Oriental Magpie-Robin"
+                scientific = "Copsychus saularis"
+            else:
+                species = "Asian Koel"
+                scientific = "Eudynamys scolopaceus"
+        else:
+            if features.syllable_rate > 3:
+                species = "Coppersmith Barbet"
+                scientific = "Psilopogon haemacephalus"
+            else:
+                species = "Common Tailorbird"
+                scientific = "Orthotomus sutorius"
+        return ZeroShotResult(
+            species_name=species,
+            scientific_name=scientific,
+            confidence=0.4,
+            confidence_label="low",
+            reasoning="Identification based on audio frequency and pattern analysis. LLM analysis unavailable.",
+            key_features_matched=["frequency range", "call pattern"],
+            alternative_species=[],
+            is_indian_bird=True,
+            is_unusual_sighting=False,
+            unusual_reason=None,
+            call_description=""
+        )
+# Global instance for quick access
+_identifier: Optional[ZeroShotBirdIdentifier] = None
+def get_zero_shot_identifier() -> ZeroShotBirdIdentifier:
+    """Get or create global zero-shot identifier."""
+    global _identifier
+    if _identifier is None:
+        _identifier = ZeroShotBirdIdentifier()
+    return _identifier