from typing import Dict, List, Any
from PIL import Image
from io import BytesIO
import base64
import torch
import csv
from transformers import CLIPProcessor, CLIPModel
import os 


class EndpointHandler():
    def __init__(self, path=""):
        self.model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
        self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
        self.model.eval()

        # Load categories from CSV
        self.categories = self.load_categories_from_csv(os.path.join(path, "categories.csv"))

    def load_categories_from_csv(self, filepath: str) -> List[str]:
        categories = []
        with open(filepath, newline='', encoding='utf-8') as csvfile:
            reader = csv.reader(csvfile)
            for row in reader:
                if row:
                    categories.append(row[0].strip())
        return categories

    def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Args:
            data: {
                "inputs": {
                    "image": base64 string
                }
            }

        Returns:
            Top 50 categories with highest similarity score.
        """
        inputs = data.get("inputs", data)

        # Decode and process image
        image = Image.open(BytesIO(base64.b64decode(inputs["image"]))).convert("RGB")

        # Process image and text
        processed = self.processor(text=self.categories, images=image, return_tensors="pt", padding=True)

        with torch.no_grad():
            image_features = self.model.get_image_features(processed["pixel_values"])
            text_features = self.model.get_text_features(processed["input_ids"], attention_mask=processed["attention_mask"])

        # Normalize features
        image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)
        text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)

        # Compute similarity
        similarity = (image_features @ text_features.T).squeeze(0)

        # Prepare result
        result = [{"label": label, "score": score.item()} for label, score in zip(self.categories, similarity)]
        result = sorted(result, key=lambda x: x["score"], reverse=True)

        return result[:50]  # Return top 50