Spaces:
Running
Running
| # tools/mcp_server_vectordb.py | |
| from mcp.server.fastmcp import FastMCP | |
| import modal | |
| import logging | |
| import json | |
| from datetime import date | |
| # Настройка логирования | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger("mcp-vectordb") | |
| # Создаем MCP сервер | |
| mcp = FastMCP("MovieVectorDB") | |
| # Подключение к Modal (инициализируется один раз) | |
| try: | |
| encode_func = modal.Function.from_name("tmdb-project", "encode_user_query") | |
| search_func = modal.Function.from_name("tmdb-project", "search_similar_movies") | |
| logger.info("✅ Connected to Modal search functions") | |
| except Exception as e: | |
| logger.error(f"❌ Failed to connect to Modal: {e}") | |
| encode_func = None | |
| search_func = None | |
| def search_plots(query: str, filters: str = None, limit: int = 5) -> str: | |
| """ | |
| Semantic search for movies based on a plot description. | |
| Args: | |
| query: Detailed description of the movie plot, theme, or story (e.g. "A robot learns to love"). | |
| filters: JSON string with narrative filters (optional). Example: '{"min_action": 0.5}'. | |
| limit: Number of movies to return (default 5, max 20). | |
| Returns: | |
| JSON string containing a list of found movies with their IDs, titles, and relevance scores. | |
| """ | |
| if not encode_func or not search_func: | |
| return json.dumps({"error": "Search service unavailable"}) | |
| logger.info(f"Searching for: {query[:50]}...") | |
| try: | |
| # 1. Векторизация (используем логику из modal_app.py) | |
| # remove_entities=True - важно для поиска по смыслу | |
| encoding_result = encode_func.remote(query, remove_entities=True) | |
| # ✅ ИЗМЕНЕНО: Отключаем remove_entities для сохранения всех слов | |
| # encoding_result = encode_func.remote(query, remove_entities=False) | |
| # 2. Поиск | |
| # Преобразуем фильтры из строки, если они есть | |
| narrative_params = encoding_result["narrative_features"] | |
| if filters: | |
| try: | |
| extra_filters = json.loads(filters) | |
| narrative_params.update(extra_filters) | |
| except: | |
| pass | |
| results_dict = search_func.remote( | |
| query_embedding=encoding_result["embedding"], | |
| query_narrative_features=narrative_params, | |
| top_k=limit * 2, # Берем с запасом | |
| rerank_top_n=limit | |
| ) | |
| candidates = results_dict.get("results", []) | |
| # Возвращаем упрощенный список для LLM (экономия токенов) | |
| simplified_results = [] | |
| for c in candidates: | |
| movie = c.get("movie_data", {}) | |
| release_date = movie.get("release_date") | |
| year = "N/A" | |
| if release_date: | |
| if isinstance(release_date, str): | |
| # Если строка - берём первые 4 символа | |
| year = release_date[:4] if len(release_date) >= 4 else release_date | |
| elif isinstance(release_date, date): | |
| # Если datetime.date объект - используем .year | |
| year = str(release_date.year) | |
| else: | |
| # Пробуем преобразовать к строке | |
| year = str(release_date)[:4] | |
| simplified_results.append({ | |
| "id": movie.get("id"), | |
| "title": movie.get("title"), | |
| "relevance_score": c.get("final_score", 0.0), | |
| "year": year, | |
| "overview": movie.get("overview", "")[:150] + "..." | |
| }) | |
| return json.dumps(simplified_results, indent=2) | |
| except Exception as e: | |
| import traceback | |
| logger.error(f"Search error: {e}") | |
| traceback.print_exc() | |
| return json.dumps({"error": str(e)}) | |
| if __name__ == "__main__": | |
| mcp.run() |