# tools/mcp_server_vectordb.py from mcp.server.fastmcp import FastMCP import modal import logging import json from datetime import date # Настройка логирования logging.basicConfig(level=logging.INFO) logger = logging.getLogger("mcp-vectordb") # Создаем MCP сервер mcp = FastMCP("MovieVectorDB") # Подключение к Modal (инициализируется один раз) try: encode_func = modal.Function.from_name("tmdb-project", "encode_user_query") search_func = modal.Function.from_name("tmdb-project", "search_similar_movies") logger.info("✅ Connected to Modal search functions") except Exception as e: logger.error(f"❌ Failed to connect to Modal: {e}") encode_func = None search_func = None @mcp.tool() def search_plots(query: str, filters: str = None, limit: int = 5) -> str: """ Semantic search for movies based on a plot description. Args: query: Detailed description of the movie plot, theme, or story (e.g. "A robot learns to love"). filters: JSON string with narrative filters (optional). Example: '{"min_action": 0.5}'. limit: Number of movies to return (default 5, max 20). Returns: JSON string containing a list of found movies with their IDs, titles, and relevance scores. """ if not encode_func or not search_func: return json.dumps({"error": "Search service unavailable"}) logger.info(f"Searching for: {query[:50]}...") try: # 1. Векторизация (используем логику из modal_app.py) # remove_entities=True - важно для поиска по смыслу encoding_result = encode_func.remote(query, remove_entities=True) # ✅ ИЗМЕНЕНО: Отключаем remove_entities для сохранения всех слов # encoding_result = encode_func.remote(query, remove_entities=False) # 2. Поиск # Преобразуем фильтры из строки, если они есть narrative_params = encoding_result["narrative_features"] if filters: try: extra_filters = json.loads(filters) narrative_params.update(extra_filters) except: pass results_dict = search_func.remote( query_embedding=encoding_result["embedding"], query_narrative_features=narrative_params, top_k=limit * 2, # Берем с запасом rerank_top_n=limit ) candidates = results_dict.get("results", []) # Возвращаем упрощенный список для LLM (экономия токенов) simplified_results = [] for c in candidates: movie = c.get("movie_data", {}) release_date = movie.get("release_date") year = "N/A" if release_date: if isinstance(release_date, str): # Если строка - берём первые 4 символа year = release_date[:4] if len(release_date) >= 4 else release_date elif isinstance(release_date, date): # Если datetime.date объект - используем .year year = str(release_date.year) else: # Пробуем преобразовать к строке year = str(release_date)[:4] simplified_results.append({ "id": movie.get("id"), "title": movie.get("title"), "relevance_score": c.get("final_score", 0.0), "year": year, "overview": movie.get("overview", "")[:150] + "..." }) return json.dumps(simplified_results, indent=2) except Exception as e: import traceback logger.error(f"Search error: {e}") traceback.print_exc() return json.dumps({"error": str(e)}) if __name__ == "__main__": mcp.run()