sohiyiy commited on
Commit
de8eebd
·
verified ·
1 Parent(s): 2ee10d3

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. ollama_client.py +254 -0
  2. zero_shot_identifier.py +470 -0
ollama_client.py ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ollama Client for BirdSense.
3
+
4
+ Provides interface to local LLM models via Ollama for:
5
+ - Species reasoning and verification
6
+ - Description matching
7
+ - Natural language queries about birds
8
+ """
9
+
10
+ import httpx
11
+ import json
12
+ from typing import Optional, Dict, Any, List, AsyncGenerator
13
+ from dataclasses import dataclass
14
+ import asyncio
15
+
16
+
17
+ @dataclass
18
+ class OllamaConfig:
19
+ """Configuration for Ollama client."""
20
+ base_url: str = "http://localhost:11434"
21
+ model: str = "phi3:mini" # Lightweight model for edge deployment
22
+ temperature: float = 0.3
23
+ max_tokens: int = 512
24
+ timeout: int = 30
25
+ stream: bool = False
26
+
27
+
28
+ class OllamaClient:
29
+ """
30
+ Async client for Ollama API.
31
+
32
+ Supports:
33
+ - Text generation
34
+ - Streaming responses
35
+ - Model listing and management
36
+ """
37
+
38
+ def __init__(self, config: Optional[OllamaConfig] = None):
39
+ self.config = config or OllamaConfig()
40
+ self._client: Optional[httpx.AsyncClient] = None
41
+
42
+ async def __aenter__(self):
43
+ self._client = httpx.AsyncClient(
44
+ base_url=self.config.base_url,
45
+ timeout=httpx.Timeout(self.config.timeout)
46
+ )
47
+ return self
48
+
49
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
50
+ if self._client:
51
+ await self._client.aclose()
52
+
53
+ @property
54
+ def client(self) -> httpx.AsyncClient:
55
+ if self._client is None:
56
+ self._client = httpx.AsyncClient(
57
+ base_url=self.config.base_url,
58
+ timeout=httpx.Timeout(self.config.timeout)
59
+ )
60
+ return self._client
61
+
62
+ async def generate(
63
+ self,
64
+ prompt: str,
65
+ system_prompt: Optional[str] = None,
66
+ temperature: Optional[float] = None,
67
+ max_tokens: Optional[int] = None,
68
+ model: Optional[str] = None
69
+ ) -> str:
70
+ """
71
+ Generate text completion.
72
+
73
+ Args:
74
+ prompt: User prompt
75
+ system_prompt: System instruction
76
+ temperature: Sampling temperature (default from config)
77
+ max_tokens: Max tokens to generate
78
+ model: Model to use (default from config)
79
+
80
+ Returns:
81
+ Generated text response
82
+ """
83
+ payload = {
84
+ "model": model or self.config.model,
85
+ "prompt": prompt,
86
+ "stream": False,
87
+ "options": {
88
+ "temperature": temperature or self.config.temperature,
89
+ "num_predict": max_tokens or self.config.max_tokens
90
+ }
91
+ }
92
+
93
+ if system_prompt:
94
+ payload["system"] = system_prompt
95
+
96
+ try:
97
+ response = await self.client.post("/api/generate", json=payload)
98
+ response.raise_for_status()
99
+ result = response.json()
100
+ return result.get("response", "")
101
+ except httpx.HTTPError as e:
102
+ raise ConnectionError(f"Failed to connect to Ollama: {e}")
103
+
104
+ async def generate_stream(
105
+ self,
106
+ prompt: str,
107
+ system_prompt: Optional[str] = None,
108
+ model: Optional[str] = None
109
+ ) -> AsyncGenerator[str, None]:
110
+ """
111
+ Stream text generation.
112
+
113
+ Yields:
114
+ Chunks of generated text
115
+ """
116
+ payload = {
117
+ "model": model or self.config.model,
118
+ "prompt": prompt,
119
+ "stream": True,
120
+ "options": {
121
+ "temperature": self.config.temperature,
122
+ "num_predict": self.config.max_tokens
123
+ }
124
+ }
125
+
126
+ if system_prompt:
127
+ payload["system"] = system_prompt
128
+
129
+ async with self.client.stream("POST", "/api/generate", json=payload) as response:
130
+ async for line in response.aiter_lines():
131
+ if line:
132
+ data = json.loads(line)
133
+ if "response" in data:
134
+ yield data["response"]
135
+ if data.get("done", False):
136
+ break
137
+
138
+ async def chat(
139
+ self,
140
+ messages: List[Dict[str, str]],
141
+ model: Optional[str] = None
142
+ ) -> str:
143
+ """
144
+ Chat completion with message history.
145
+
146
+ Args:
147
+ messages: List of {"role": "user/assistant/system", "content": "..."}
148
+ model: Model to use
149
+
150
+ Returns:
151
+ Assistant response
152
+ """
153
+ payload = {
154
+ "model": model or self.config.model,
155
+ "messages": messages,
156
+ "stream": False,
157
+ "options": {
158
+ "temperature": self.config.temperature,
159
+ "num_predict": self.config.max_tokens
160
+ }
161
+ }
162
+
163
+ try:
164
+ response = await self.client.post("/api/chat", json=payload)
165
+ response.raise_for_status()
166
+ result = response.json()
167
+ return result.get("message", {}).get("content", "")
168
+ except httpx.HTTPError as e:
169
+ raise ConnectionError(f"Failed to connect to Ollama: {e}")
170
+
171
+ async def list_models(self) -> List[Dict[str, Any]]:
172
+ """List available models."""
173
+ try:
174
+ response = await self.client.get("/api/tags")
175
+ response.raise_for_status()
176
+ return response.json().get("models", [])
177
+ except httpx.HTTPError as e:
178
+ raise ConnectionError(f"Failed to list models: {e}")
179
+
180
+ async def is_model_available(self, model: Optional[str] = None) -> bool:
181
+ """Check if specified model is available."""
182
+ model = model or self.config.model
183
+ try:
184
+ models = await self.list_models()
185
+ return any(m.get("name", "").startswith(model.split(":")[0]) for m in models)
186
+ except Exception:
187
+ return False
188
+
189
+ async def health_check(self) -> bool:
190
+ """Check if Ollama server is running."""
191
+ try:
192
+ response = await self.client.get("/api/tags")
193
+ return response.status_code == 200
194
+ except Exception:
195
+ return False
196
+
197
+
198
+ class SyncOllamaClient:
199
+ """
200
+ Synchronous wrapper for OllamaClient.
201
+
202
+ Convenience class for non-async code paths.
203
+ """
204
+
205
+ def __init__(self, config: Optional[OllamaConfig] = None):
206
+ self.config = config or OllamaConfig()
207
+ self._async_client = OllamaClient(config)
208
+
209
+ def _run(self, coro):
210
+ """Run async coroutine synchronously."""
211
+ try:
212
+ loop = asyncio.get_event_loop()
213
+ if loop.is_running():
214
+ # If we're in an async context, use nest_asyncio pattern
215
+ import nest_asyncio
216
+ nest_asyncio.apply()
217
+ return loop.run_until_complete(coro)
218
+ else:
219
+ return loop.run_until_complete(coro)
220
+ except RuntimeError:
221
+ # No event loop exists
222
+ return asyncio.run(coro)
223
+
224
+ def generate(
225
+ self,
226
+ prompt: str,
227
+ system_prompt: Optional[str] = None,
228
+ temperature: Optional[float] = None,
229
+ max_tokens: Optional[int] = None,
230
+ model: Optional[str] = None
231
+ ) -> str:
232
+ """Generate text completion synchronously."""
233
+ return self._run(
234
+ self._async_client.generate(
235
+ prompt, system_prompt, temperature, max_tokens, model
236
+ )
237
+ )
238
+
239
+ def chat(
240
+ self,
241
+ messages: List[Dict[str, str]],
242
+ model: Optional[str] = None
243
+ ) -> str:
244
+ """Chat completion synchronously."""
245
+ return self._run(self._async_client.chat(messages, model))
246
+
247
+ def health_check(self) -> bool:
248
+ """Check Ollama health synchronously."""
249
+ return self._run(self._async_client.health_check())
250
+
251
+ def is_model_available(self, model: Optional[str] = None) -> bool:
252
+ """Check model availability synchronously."""
253
+ return self._run(self._async_client.is_model_available(model))
254
+
zero_shot_identifier.py ADDED
@@ -0,0 +1,470 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Zero-Shot Bird Identification using LLM.
3
+
4
+ This is the CORE innovation: Instead of training on every bird,
5
+ we use the LLM's knowledge to identify ANY bird from audio features.
6
+
7
+ The LLM has learned about thousands of bird species from its training data,
8
+ including their calls, habitats, and behaviors.
9
+ """
10
+
11
+ import json
12
+ import logging
13
+ from dataclasses import dataclass
14
+ from typing import List, Dict, Any, Optional, Tuple
15
+ import numpy as np
16
+
17
+ from .ollama_client import OllamaClient, OllamaConfig
18
+
19
+ logger = logging.getLogger(__name__)
20
+
21
+
22
+ @dataclass
23
+ class AudioFeatures:
24
+ """Extracted audio features for LLM analysis."""
25
+ duration: float
26
+ dominant_frequency_hz: float
27
+ frequency_range: Tuple[float, float]
28
+ spectral_centroid: float
29
+ spectral_bandwidth: float
30
+ tempo_bpm: float
31
+ num_syllables: int
32
+ syllable_rate: float # syllables per second
33
+ is_melodic: bool
34
+ is_repetitive: bool
35
+ amplitude_pattern: str # "constant", "rising", "falling", "varied"
36
+ estimated_snr_db: float
37
+ quality_score: float
38
+
39
+
40
+ @dataclass
41
+ class ZeroShotResult:
42
+ """Result from zero-shot identification."""
43
+ species_name: str
44
+ scientific_name: str
45
+ confidence: float # 0.0 to 1.0
46
+ confidence_label: str # "high", "medium", "low"
47
+ reasoning: str
48
+ key_features_matched: List[str]
49
+ alternative_species: List[Dict[str, Any]]
50
+ is_indian_bird: bool
51
+ is_unusual_sighting: bool
52
+ unusual_reason: Optional[str]
53
+ call_description: str
54
+
55
+
56
+ class ZeroShotBirdIdentifier:
57
+ """
58
+ Zero-shot bird identification using LLM.
59
+
60
+ This approach:
61
+ 1. Extracts audio features (frequency, pattern, duration)
62
+ 2. Sends features to LLM with expert prompt
63
+ 3. LLM identifies bird from its knowledge base
64
+ 4. Returns species with confidence and reasoning
65
+
66
+ Benefits:
67
+ - No training required
68
+ - Can identify ANY of 10,000+ bird species
69
+ - Works for non-Indian birds too (with novelty flag)
70
+ - Explainable results
71
+ """
72
+
73
+ def __init__(self, ollama_config: Optional[OllamaConfig] = None):
74
+ self.ollama = OllamaClient(ollama_config or OllamaConfig(model="qwen2.5:3b"))
75
+ self.is_ready = False
76
+
77
+ def initialize(self) -> bool:
78
+ """Check if LLM is available."""
79
+ try:
80
+ import asyncio
81
+
82
+ async def _check():
83
+ return await self.ollama.health_check()
84
+
85
+ try:
86
+ loop = asyncio.get_event_loop()
87
+ if loop.is_running():
88
+ import nest_asyncio
89
+ nest_asyncio.apply()
90
+ self.is_ready = loop.run_until_complete(_check())
91
+ except RuntimeError:
92
+ self.is_ready = asyncio.run(_check())
93
+
94
+ return self.is_ready
95
+ except Exception as e:
96
+ logger.warning(f"Failed to initialize LLM: {e}")
97
+ return False
98
+
99
+ def extract_features(
100
+ self,
101
+ audio: np.ndarray,
102
+ sample_rate: int = 32000,
103
+ mel_spec: Optional[np.ndarray] = None
104
+ ) -> AudioFeatures:
105
+ """Extract audio features for LLM analysis."""
106
+ import scipy.signal as signal
107
+
108
+ duration = len(audio) / sample_rate
109
+
110
+ # Frequency analysis
111
+ freqs, psd = signal.welch(audio, sample_rate, nperseg=2048)
112
+
113
+ # Dominant frequency
114
+ dominant_idx = np.argmax(psd)
115
+ dominant_freq = freqs[dominant_idx]
116
+
117
+ # Frequency range (where 90% of energy is)
118
+ cumsum = np.cumsum(psd) / np.sum(psd)
119
+ freq_low = freqs[np.searchsorted(cumsum, 0.05)]
120
+ freq_high = freqs[np.searchsorted(cumsum, 0.95)]
121
+
122
+ # Spectral centroid
123
+ spectral_centroid = np.sum(freqs * psd) / (np.sum(psd) + 1e-10)
124
+
125
+ # Spectral bandwidth
126
+ spectral_bandwidth = np.sqrt(np.sum(((freqs - spectral_centroid) ** 2) * psd) / (np.sum(psd) + 1e-10))
127
+
128
+ # Amplitude envelope analysis
129
+ envelope = np.abs(signal.hilbert(audio))
130
+ envelope_smooth = signal.medfilt(envelope, 1001)
131
+
132
+ # Detect syllables (peaks in envelope)
133
+ peaks, _ = signal.find_peaks(envelope_smooth, height=0.1 * np.max(envelope_smooth), distance=sample_rate // 10)
134
+ num_syllables = len(peaks)
135
+ syllable_rate = num_syllables / duration if duration > 0 else 0
136
+
137
+ # Amplitude pattern
138
+ if len(envelope_smooth) > 100:
139
+ start_amp = np.mean(envelope_smooth[:len(envelope_smooth)//4])
140
+ end_amp = np.mean(envelope_smooth[-len(envelope_smooth)//4:])
141
+ amp_var = np.std(envelope_smooth) / (np.mean(envelope_smooth) + 1e-10)
142
+
143
+ if amp_var > 0.5:
144
+ amp_pattern = "varied"
145
+ elif end_amp > start_amp * 1.3:
146
+ amp_pattern = "rising"
147
+ elif end_amp < start_amp * 0.7:
148
+ amp_pattern = "falling"
149
+ else:
150
+ amp_pattern = "constant"
151
+ else:
152
+ amp_pattern = "constant"
153
+
154
+ # Melodic detection (frequency variation)
155
+ if len(audio) > sample_rate:
156
+ chunks = np.array_split(audio, 10)
157
+ chunk_freqs = []
158
+ for chunk in chunks:
159
+ if len(chunk) > 512:
160
+ f, p = signal.welch(chunk, sample_rate, nperseg=512)
161
+ chunk_freqs.append(f[np.argmax(p)])
162
+ freq_variation = np.std(chunk_freqs) / (np.mean(chunk_freqs) + 1e-10)
163
+ is_melodic = freq_variation > 0.1
164
+ else:
165
+ is_melodic = False
166
+
167
+ # Repetitiveness detection
168
+ if num_syllables >= 3:
169
+ if syllable_rate > 1.5 and syllable_rate < 10: # Regular pattern
170
+ is_repetitive = True
171
+ else:
172
+ is_repetitive = False
173
+ else:
174
+ is_repetitive = num_syllables >= 2
175
+
176
+ # SNR estimation
177
+ noise_floor = np.percentile(np.abs(audio), 10)
178
+ signal_peak = np.percentile(np.abs(audio), 95)
179
+ snr_db = 20 * np.log10((signal_peak + 1e-10) / (noise_floor + 1e-10))
180
+
181
+ # Quality score
182
+ quality_score = min(1.0, max(0.0, (snr_db - 5) / 25))
183
+
184
+ # Tempo (for rhythmic calls)
185
+ if num_syllables >= 2:
186
+ tempo_bpm = syllable_rate * 60
187
+ else:
188
+ tempo_bpm = 0
189
+
190
+ return AudioFeatures(
191
+ duration=duration,
192
+ dominant_frequency_hz=float(dominant_freq),
193
+ frequency_range=(float(freq_low), float(freq_high)),
194
+ spectral_centroid=float(spectral_centroid),
195
+ spectral_bandwidth=float(spectral_bandwidth),
196
+ tempo_bpm=float(tempo_bpm),
197
+ num_syllables=num_syllables,
198
+ syllable_rate=float(syllable_rate),
199
+ is_melodic=is_melodic,
200
+ is_repetitive=is_repetitive,
201
+ amplitude_pattern=amp_pattern,
202
+ estimated_snr_db=float(snr_db),
203
+ quality_score=float(quality_score)
204
+ )
205
+
206
+ def identify(
207
+ self,
208
+ features: AudioFeatures,
209
+ location: Optional[str] = None,
210
+ month: Optional[int] = None,
211
+ user_description: Optional[str] = None
212
+ ) -> ZeroShotResult:
213
+ """
214
+ Identify bird species using zero-shot LLM inference.
215
+
216
+ This is the NOVEL approach - using LLM's knowledge to identify
217
+ any bird without needing to train on that specific species.
218
+ """
219
+
220
+ # Build expert prompt
221
+ prompt = self._build_identification_prompt(features, location, month, user_description)
222
+
223
+ # Call LLM (synchronously using asyncio)
224
+ try:
225
+ import asyncio
226
+
227
+ async def _generate():
228
+ return await self.ollama.generate(
229
+ prompt,
230
+ system_prompt=self._get_expert_system_prompt(),
231
+ temperature=0.3, # Lower for more deterministic
232
+ max_tokens=1000
233
+ )
234
+
235
+ # Run async in sync context
236
+ try:
237
+ loop = asyncio.get_event_loop()
238
+ if loop.is_running():
239
+ # Use nest_asyncio for nested event loops
240
+ import nest_asyncio
241
+ nest_asyncio.apply()
242
+ response = loop.run_until_complete(_generate())
243
+ except RuntimeError:
244
+ # No event loop running
245
+ response = asyncio.run(_generate())
246
+
247
+ # Parse response
248
+ return self._parse_identification_response(response, features)
249
+
250
+ except Exception as e:
251
+ logger.error(f"LLM identification failed: {e}")
252
+ return self._fallback_result(features)
253
+
254
+ def _get_expert_system_prompt(self) -> str:
255
+ """Expert ornithologist system prompt."""
256
+ return """You are an expert ornithologist with deep knowledge of bird vocalizations worldwide.
257
+ You can identify birds by their calls based on frequency, pattern, duration, and context.
258
+
259
+ Your expertise includes:
260
+ - 10,000+ bird species globally
261
+ - Detailed knowledge of Indian birds (1,300+ species)
262
+ - Ability to distinguish similar-sounding species
263
+ - Understanding of seasonal and geographic variations
264
+
265
+ When identifying birds:
266
+ 1. Consider the audio characteristics carefully
267
+ 2. Match against known bird call patterns
268
+ 3. Account for regional variations
269
+ 4. Flag unusual or rare sightings
270
+ 5. Provide confidence based on how well features match
271
+
272
+ Always respond in the exact JSON format requested."""
273
+
274
+ def _build_identification_prompt(
275
+ self,
276
+ features: AudioFeatures,
277
+ location: Optional[str],
278
+ month: Optional[int],
279
+ user_description: Optional[str]
280
+ ) -> str:
281
+ """Build identification prompt from audio features."""
282
+
283
+ # Describe frequency in bird call terms
284
+ freq_desc = self._describe_frequency(features.dominant_frequency_hz)
285
+
286
+ # Season
287
+ season = self._get_season(month) if month else "unknown"
288
+
289
+ prompt = f"""Identify this bird based on its call characteristics:
290
+
291
+ ## Audio Features
292
+ - **Duration**: {features.duration:.1f} seconds
293
+ - **Dominant Frequency**: {features.dominant_frequency_hz:.0f} Hz ({freq_desc})
294
+ - **Frequency Range**: {features.frequency_range[0]:.0f} - {features.frequency_range[1]:.0f} Hz
295
+ - **Call Pattern**: {"Melodic/varied" if features.is_melodic else "Monotone"}, {"Repetitive" if features.is_repetitive else "Non-repetitive"}
296
+ - **Syllables**: {features.num_syllables} syllables at {features.syllable_rate:.1f}/second
297
+ - **Rhythm**: {features.tempo_bpm:.0f} BPM (beats per minute)
298
+ - **Amplitude**: {features.amplitude_pattern} pattern
299
+
300
+ ## Context
301
+ - **Location**: {location or "India (unspecified)"}
302
+ - **Season**: {season}
303
+ - **Recording Quality**: {self._quality_label(features.quality_score)} (SNR: {features.estimated_snr_db:.0f}dB)
304
+ """
305
+
306
+ if user_description:
307
+ prompt += f"- **Observer Notes**: {user_description}\n"
308
+
309
+ prompt += """
310
+ ## Task
311
+ Based on these audio features, identify the most likely bird species.
312
+
313
+ Respond in this exact JSON format:
314
+ {
315
+ "species_name": "Common Name",
316
+ "scientific_name": "Genus species",
317
+ "confidence": 0.85,
318
+ "reasoning": "Detailed explanation of why this species matches...",
319
+ "key_features_matched": ["feature1", "feature2"],
320
+ "alternatives": [
321
+ {"name": "Alternative 1", "scientific": "Genus species", "confidence": 0.1},
322
+ {"name": "Alternative 2", "scientific": "Genus species", "confidence": 0.05}
323
+ ],
324
+ "is_indian_bird": true,
325
+ "is_unusual": false,
326
+ "unusual_reason": null,
327
+ "typical_call": "Description of what this bird typically sounds like"
328
+ }"""
329
+
330
+ return prompt
331
+
332
+ def _describe_frequency(self, freq: float) -> str:
333
+ """Describe frequency in bird call terms."""
334
+ if freq < 500:
335
+ return "very low (large bird or booming call)"
336
+ elif freq < 1000:
337
+ return "low (owl, dove, or large bird)"
338
+ elif freq < 2000:
339
+ return "low-medium (cuckoo, crow, or medium bird)"
340
+ elif freq < 4000:
341
+ return "medium (most songbirds)"
342
+ elif freq < 6000:
343
+ return "medium-high (warbler, sunbird)"
344
+ elif freq < 8000:
345
+ return "high (small passerine)"
346
+ else:
347
+ return "very high (insect-like or whistle)"
348
+
349
+ def _get_season(self, month: int) -> str:
350
+ """Get Indian season from month."""
351
+ if month in [12, 1, 2]:
352
+ return "winter (Dec-Feb) - winter migrants present"
353
+ elif month in [3, 4, 5]:
354
+ return "summer/pre-monsoon (Mar-May) - breeding season"
355
+ elif month in [6, 7, 8, 9]:
356
+ return "monsoon (Jun-Sep)"
357
+ else:
358
+ return "post-monsoon (Oct-Nov) - migration period"
359
+
360
+ def _quality_label(self, score: float) -> str:
361
+ """Convert quality score to label."""
362
+ if score > 0.8:
363
+ return "excellent"
364
+ elif score > 0.6:
365
+ return "good"
366
+ elif score > 0.4:
367
+ return "fair"
368
+ else:
369
+ return "poor"
370
+
371
+ def _parse_identification_response(
372
+ self,
373
+ response: str,
374
+ features: AudioFeatures
375
+ ) -> ZeroShotResult:
376
+ """Parse LLM response into structured result."""
377
+ try:
378
+ # Try to extract JSON from response
379
+ json_start = response.find('{')
380
+ json_end = response.rfind('}') + 1
381
+
382
+ if json_start >= 0 and json_end > json_start:
383
+ json_str = response[json_start:json_end]
384
+ data = json.loads(json_str)
385
+
386
+ confidence = float(data.get('confidence', 0.5))
387
+
388
+ return ZeroShotResult(
389
+ species_name=data.get('species_name', 'Unknown'),
390
+ scientific_name=data.get('scientific_name', ''),
391
+ confidence=confidence,
392
+ confidence_label=self._confidence_label(confidence),
393
+ reasoning=data.get('reasoning', ''),
394
+ key_features_matched=data.get('key_features_matched', []),
395
+ alternative_species=data.get('alternatives', []),
396
+ is_indian_bird=data.get('is_indian_bird', True),
397
+ is_unusual_sighting=data.get('is_unusual', False),
398
+ unusual_reason=data.get('unusual_reason'),
399
+ call_description=data.get('typical_call', '')
400
+ )
401
+ except json.JSONDecodeError as e:
402
+ logger.warning(f"Failed to parse LLM JSON: {e}")
403
+
404
+ # Fallback: try to extract species name from text
405
+ return self._fallback_result(features, response)
406
+
407
+ def _confidence_label(self, confidence: float) -> str:
408
+ """Convert confidence to label."""
409
+ if confidence >= 0.8:
410
+ return "high"
411
+ elif confidence >= 0.6:
412
+ return "medium"
413
+ else:
414
+ return "low"
415
+
416
+ def _fallback_result(
417
+ self,
418
+ features: AudioFeatures,
419
+ llm_response: str = ""
420
+ ) -> ZeroShotResult:
421
+ """Generate fallback result when LLM parsing fails."""
422
+
423
+ # Try to guess based on frequency
424
+ if features.dominant_frequency_hz < 1000:
425
+ if features.is_repetitive:
426
+ species = "Spotted Owlet"
427
+ scientific = "Athene brama"
428
+ else:
429
+ species = "Indian Cuckoo"
430
+ scientific = "Cuculus micropterus"
431
+ elif features.dominant_frequency_hz < 3000:
432
+ if features.is_melodic:
433
+ species = "Oriental Magpie-Robin"
434
+ scientific = "Copsychus saularis"
435
+ else:
436
+ species = "Asian Koel"
437
+ scientific = "Eudynamys scolopaceus"
438
+ else:
439
+ if features.syllable_rate > 3:
440
+ species = "Coppersmith Barbet"
441
+ scientific = "Psilopogon haemacephalus"
442
+ else:
443
+ species = "Common Tailorbird"
444
+ scientific = "Orthotomus sutorius"
445
+
446
+ return ZeroShotResult(
447
+ species_name=species,
448
+ scientific_name=scientific,
449
+ confidence=0.4,
450
+ confidence_label="low",
451
+ reasoning="Identification based on audio frequency and pattern analysis. LLM analysis unavailable.",
452
+ key_features_matched=["frequency range", "call pattern"],
453
+ alternative_species=[],
454
+ is_indian_bird=True,
455
+ is_unusual_sighting=False,
456
+ unusual_reason=None,
457
+ call_description=""
458
+ )
459
+
460
+
461
+ # Global instance for quick access
462
+ _identifier: Optional[ZeroShotBirdIdentifier] = None
463
+
464
+ def get_zero_shot_identifier() -> ZeroShotBirdIdentifier:
465
+ """Get or create global zero-shot identifier."""
466
+ global _identifier
467
+ if _identifier is None:
468
+ _identifier = ZeroShotBirdIdentifier()
469
+ return _identifier
470
+