cinematch-ai / tools /mcp_server_vectordb.py
dbadeev's picture
Upload 25 files
16ce932 verified
# tools/mcp_server_vectordb.py
from mcp.server.fastmcp import FastMCP
import modal
import logging
import json
from datetime import date
# Настройка логирования
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("mcp-vectordb")
# Создаем MCP сервер
mcp = FastMCP("MovieVectorDB")
# Подключение к Modal (инициализируется один раз)
try:
encode_func = modal.Function.from_name("tmdb-project", "encode_user_query")
search_func = modal.Function.from_name("tmdb-project", "search_similar_movies")
logger.info("✅ Connected to Modal search functions")
except Exception as e:
logger.error(f"❌ Failed to connect to Modal: {e}")
encode_func = None
search_func = None
@mcp.tool()
def search_plots(query: str, filters: str = None, limit: int = 5) -> str:
"""
Semantic search for movies based on a plot description.
Args:
query: Detailed description of the movie plot, theme, or story (e.g. "A robot learns to love").
filters: JSON string with narrative filters (optional). Example: '{"min_action": 0.5}'.
limit: Number of movies to return (default 5, max 20).
Returns:
JSON string containing a list of found movies with their IDs, titles, and relevance scores.
"""
if not encode_func or not search_func:
return json.dumps({"error": "Search service unavailable"})
logger.info(f"Searching for: {query[:50]}...")
try:
# 1. Векторизация (используем логику из modal_app.py)
# remove_entities=True - важно для поиска по смыслу
encoding_result = encode_func.remote(query, remove_entities=True)
# ✅ ИЗМЕНЕНО: Отключаем remove_entities для сохранения всех слов
# encoding_result = encode_func.remote(query, remove_entities=False)
# 2. Поиск
# Преобразуем фильтры из строки, если они есть
narrative_params = encoding_result["narrative_features"]
if filters:
try:
extra_filters = json.loads(filters)
narrative_params.update(extra_filters)
except:
pass
results_dict = search_func.remote(
query_embedding=encoding_result["embedding"],
query_narrative_features=narrative_params,
top_k=limit * 2, # Берем с запасом
rerank_top_n=limit
)
candidates = results_dict.get("results", [])
# Возвращаем упрощенный список для LLM (экономия токенов)
simplified_results = []
for c in candidates:
movie = c.get("movie_data", {})
release_date = movie.get("release_date")
year = "N/A"
if release_date:
if isinstance(release_date, str):
# Если строка - берём первые 4 символа
year = release_date[:4] if len(release_date) >= 4 else release_date
elif isinstance(release_date, date):
# Если datetime.date объект - используем .year
year = str(release_date.year)
else:
# Пробуем преобразовать к строке
year = str(release_date)[:4]
simplified_results.append({
"id": movie.get("id"),
"title": movie.get("title"),
"relevance_score": c.get("final_score", 0.0),
"year": year,
"overview": movie.get("overview", "")[:150] + "..."
})
return json.dumps(simplified_results, indent=2)
except Exception as e:
import traceback
logger.error(f"Search error: {e}")
traceback.print_exc()
return json.dumps({"error": str(e)})
if __name__ == "__main__":
mcp.run()