import torch from sentence_transformers import SentenceTransformer from datasets import load_dataset from sentence_transformers.util import cos_sim import gradio as gr # --- FIX: Dynamically select the device --- if torch.backends.mps.is_available(): device = torch.device("mps") print("MPS device is available. Using M1/M2 GPU!") elif torch.cuda.is_available(): device = torch.device("cuda") print("CUDA device is available. Using NVIDIA GPU!") else: device = torch.device("cpu") print("No GPU available. Falling back to CPU.") model_name = 'all-MiniLM-L6-v2' model = SentenceTransformer(model_name, device=device) print("Loading dataset...") ds = load_dataset("pszemraj/goodreads-bookgenres", "default") df = ds['train'].to_pandas() # --- FIX: Print columns to find the correct name --- print("Available columns:", df.columns.tolist()) # Drop rows with missing descriptions to avoid errors during encoding df.dropna(subset=['Description'], inplace=True) print("Dataset loaded and cleaned. Head of DataFrame:") print(df.head()) # --- 2. Generate Book Embeddings --- print("Generating book embeddings...") # Encode all descriptions at once for efficiency book_descriptions = df['Description'].tolist() book_embeddings = model.encode(book_descriptions, convert_to_tensor=True, show_progress_bar=True) print("Embeddings generated.") # --- 3. Define Recommendation Function --- def recommend_books(query, top_k=5): """ Finds and returns the top_k most similar books to a given query. """ query_embedding = model.encode(query, convert_to_tensor=True) # Calculate cosine similarity between the query and all book embeddings cosine_scores = cos_sim(query_embedding, book_embeddings) # Get the indices of the top k most similar books top_k_indices = torch.topk(cosine_scores, k=top_k)[1].squeeze() # --- FIX: Move the tensor to the CPU before using it with pandas --- top_k_indices_cpu = top_k_indices.cpu() # Use iloc to retrieve the corresponding book information from the DataFrame # Use iloc to retrieve the corresponding book information from the DataFrame # Use .tolist() to convert it into a simple Python list of integers recommended_books = df.iloc[top_k_indices_cpu.tolist()] return recommended_books[['Book', 'Description']] # --- 4. Define Gradio Interface and Launch --- print("Launching Gradio interface...") gr_interface = gr.Interface( fn=recommend_books, inputs=gr.Textbox(lines=2, placeholder="Enter a book topic, genre, or title..."), outputs=gr.Dataframe(headers=["Book", "Description"]), title="Book Recommendation System", description="Get book recommendations based on your query.", examples=["A thrilling detective story", "A heartwarming novel about friendship", "Science fiction about space travel"] ) gr_interface.launch(share=True)