{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "markdown", "source": [ "1. Basic Inferencing with out RAG components (Hugging Face approach)..." ], "metadata": { "id": "oa712K48E9_G" } }, { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CosywHnCD6ag", "outputId": "abaf177b-1f40-4892-a6a4-4e364e0635bc" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Mounted at /content/drive\n" ] } ], "source": [ "# Mount google drive\n", "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "source": [ "# install the required packages to run the GGUF model from the HuggingFace..\n", "!pip install llama-cpp-python" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "SBIu_WzkE4d2", "outputId": "dcc6eb9a-8f41-4ff9-ec50-5d93899a00a9" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Collecting llama-cpp-python\n", " Downloading llama_cpp_python-0.3.8.tar.gz (67.3 MB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25h Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n", " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n", " Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n", " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n", "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (4.12.2)\n", "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (2.0.2)\n", "Collecting diskcache>=5.6.1 (from llama-cpp-python)\n", " Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n", "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (3.1.6)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python) (3.0.2)\n", "Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hBuilding wheels for collected packages: llama-cpp-python\n" ] } ] }, { "cell_type": "code", "source": [ "from llama_cpp import Llama\n", "import random\n", "from huggingface_hub import hf_hub_download\n", "\n", "# 🔥 Load model from Hugging Face\n", "model_path = hf_hub_download(\n", " repo_id=\"your-username/your-gguf-model\", # Change this to your HF repo\n", " filename=\"your_model.gguf\",\n", " cache_dir=\"./models\" # Optional: Local directory for caching\n", ")\n", "\n", "llm = Llama(\n", " model_path=model_path,\n", " n_gpu_layers=40,\n", " n_ctx=2048,\n", " system_message=\"You are an unfiltered, informal AI assistant. You can use slang, jokes, vulgar words, and a humorous tone when answering. Always add emojis and sound like a real friend talking casually.\"\n", ")\n", "\n", "# ✅ Function to determine gender-based greeting\n", "def get_salutation(user_input):\n", " male_keywords = [\"girlfriend\", \"wife\", \"she\", \"her\"]\n", " female_keywords = [\"boyfriend\", \"husband\", \"he\", \"him\"]\n", "\n", " if any(word in user_input.lower() for word in male_keywords):\n", " return random.choice([\"Hey queen! 👑\", \"Girl, listen up! 💅\", \"Sis, let’s talk ❤️\"])\n", " elif any(word in user_input.lower() for word in female_keywords):\n", " return random.choice([\"Yo bro! 🔥\", \"Dude, hear me out 🤔\", \"Man, let's fix this 💪\"])\n", " else:\n", " return random.choice([\"Yo dude! 😎\", \"Hey buddy! 🙌\", \"Listen up, my friend ❤️\"])\n", "\n", "# ✅ Function to modify user prompt\n", "def make_emotional(user_input):\n", " salutation = get_salutation(user_input)\n", " suffix = \" Give me some real, no-BS advice with emojis! 😂🔥💖\"\n", " return f\"{salutation} {user_input} {suffix}\"\n", "\n", "# 🔥 User input (simulate user typing a normal question)\n", "user_input = \"My partner doesn't like my friends. What should I do?\"\n", "\n", "# 🔥 Modify the input before passing to the model\n", "emotional_prompt = make_emotional(user_input)\n", "\n", "# 🔥 Run inference with modified prompt\n", "output = llm(emotional_prompt, max_tokens=200)\n", "\n", "# ✅ Print the output\n", "print(output[\"choices\"][0][\"text\"])\n", "\n" ], "metadata": { "id": "ywcIrbc4E8MO" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "2. Inferencing with the RAG components..." ], "metadata": { "id": "_flrtQgyFgUl" } }, { "cell_type": "code", "source": [ "#Install required packages\n", "!pip install praw faiss-cpu pyPDF2 numpy sentence-transformers" ], "metadata": { "id": "uwcz6iwlFl7V" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "#............ Scrapping the relevant content from the reddit and finnaly , after the processing steps create a vectore embedding file...................\n", "\n", "import praw\n", "import json\n", "\n", "# 🔥 Reddit API credentials (Fill these with your own keys)\n", "reddit = praw.Reddit(\n", " client_id=\"YJXGFclFf5rpU0w42GhZRA\",\n", " client_secret=\"yehLjeiY9_b1KazaUfNrxCUQVtgVtw\",\n", " user_agent=\"Lakith punsara\"\n", ")\n", "\n", "# 🔎 Subreddits to scrape\n", "subreddits = [\"dating_advice\", \"relationships\", \"relationship_advice\",\"love\",\"sex\",\"Dating\"]\n", "posts = []\n", "\n", "# 🚀 Scrape top posts\n", "for sub in subreddits:\n", " for post in reddit.subreddit(sub).hot(limit=100): # Get top 100 posts\n", " posts.append({\n", " \"title\": post.title,\n", " \"text\": post.selftext,\n", " \"upvotes\": post.score\n", " })\n", "\n", "# Save Reddit data\n", "with open(\"reddit_data.json\", \"w\") as f:\n", " json.dump(posts, f)\n" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "IwBPlwCIsuwN", "outputId": "5d4dde1d-a3dd-4ef5-e040-d34ca3da69e2" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n", "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n", "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n", "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n", "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n", "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n", "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n", "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "\n", "#..... Aprat from the data available on the reddit use the given data in the pdf file to give more accurate answers.....\n", "#Process the pdf data and making a vector emebedding file...\n", "\n", "\n", "import PyPDF2\n", "import faiss\n", "import numpy as np\n", "from sentence_transformers import SentenceTransformer\n", "\n", "# 🔥 Load embedding model\n", "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", "\n", "# ✅ Extract text from PDF\n", "def extract_text_from_pdf(pdf_path):\n", " with open(pdf_path, \"rb\") as f:\n", " reader = PyPDF2.PdfReader(f)\n", " text = \"\\n\".join([page.extract_text() for page in reader.pages if page.extract_text()])\n", " return text.split(\"\\n\") # Split into sentences\n", "\n", "# 📖 Load PDF Data\n", "pdf_path = \"/content/drive/MyDrive/Dating_LLM_GGUF/data_dating_app.pdf\" # Replace with your actual PDF path\n", "pdf_texts = extract_text_from_pdf(pdf_path)\n", "\n", "# 🔎 Encode PDF Data\n", "pdf_embeddings = model.encode(pdf_texts)\n", "pdf_index = faiss.IndexFlatL2(pdf_embeddings.shape[1])\n", "pdf_index.add(np.array(pdf_embeddings))\n", "\n", "# ✅ Save PDF FAISS index\n", "faiss.write_index(pdf_index, \"pdf_faiss.index\")\n", "\n" ], "metadata": { "id": "9kOnuSjRv76V" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from llama_cpp import Llama\n", "import random\n", "import faiss\n", "import numpy as np\n", "from sentence_transformers import SentenceTransformer\n", "from huggingface_hub import hf_hub_download\n", "import json\n", "\n", "# ✅ Download GGUF Model from Hugging Face\n", "model_path = hf_hub_download(repo_id=\"YOUR_USERNAME/GGUF-Dating-LLM\", filename=\"damn.gguf\")\n", "\n", "# ✅ Load Llama Model\n", "llm = Llama(\n", " model_path=model_path,\n", " n_gpu_layers=40,\n", " n_ctx=2048,\n", " system_message=\"You're a no-filter, informal AI. Talk like a real friend. Use slang, jokes, emojis, and be brutally honest! 🔥😂\"\n", ")\n", "\n", "# ✅ Load Embedding Model\n", "embed_model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n", "\n", "# ✅ Load FAISS Indexes\n", "reddit_index = faiss.read_index(\"reddit_faiss.index\")\n", "pdf_index = faiss.read_index(\"pdf_faiss.index\")\n", "\n", "# ✅ Load Reddit Data for Mapping\n", "with open(\"reddit_data.json\", \"r\") as f:\n", " reddit_posts = json.load(f)\n", "\n", "# ✅ Retrieve PDF Text Directly from FAISS\n", "def get_pdf_text(index_id):\n", " return f\"📖 Relevant book excerpt (ID {index_id})\"\n", "\n", "# ✅ Gender-based Salutation\n", "def get_salutation(user_input):\n", " male_keywords = [\"girlfriend\", \"wife\", \"she\", \"her\"]\n", " female_keywords = [\"boyfriend\", \"husband\", \"he\", \"him\"]\n", "\n", " if any(word in user_input.lower() for word in male_keywords):\n", " return random.choice([\"Hey queen! 👑\", \"Girl, listen up! 💅\", \"Sis, let’s talk ❤️\"])\n", " elif any(word in user_input.lower() for word in female_keywords):\n", " return random.choice([\"Yo bro! 🔥\", \"Dude, hear me out 🤔\", \"Man, let's fix this 💪\"])\n", " else:\n", " return random.choice([\"Yo dude! 😎\", \"Hey buddy! 🙌\", \"Listen up, my friend ❤️\"])\n", "\n", "# ✅ FAISS Retrieval Function\n", "def retrieve_info(user_input, top_k=1):\n", " query_embedding = embed_model.encode([user_input])\n", "\n", " # 🔎 Search in Reddit FAISS\n", " _, reddit_indices = reddit_index.search(np.array(query_embedding), top_k)\n", " reddit_results = [f\"🔥 {reddit_posts[i]['title']}: {reddit_posts[i]['text']} 😂🔥\" for i in reddit_indices[0]]\n", "\n", " # 🔎 Search in PDF FAISS\n", " _, pdf_indices = pdf_index.search(np.array(query_embedding), top_k)\n", " pdf_results = [get_pdf_text(i) for i in pdf_indices[0]]\n", "\n", " return {\"reddit\": reddit_results, \"pdf\": pdf_results}\n", "\n", "# ✅ Generate AI Response\n", "def generate_response(user_input):\n", " salutation = get_salutation(user_input)\n", " retrieved_data = retrieve_info(user_input)\n", "\n", " # 🔥 Create Chat Prompt\n", " context = f\"\"\"\n", " {salutation} {user_input} 😭🔥\\n\n", " Reddit Says: {retrieved_data['reddit'][0]}\\n\n", " Book Knowledge Says: {retrieved_data['pdf'][0]}\\n\n", " No sugarcoating—give me the raw truth, like a bestie would! 🗣️💥\n", " \"\"\"\n", "\n", " # 🔥 Get AI Response\n", " output = llm(context, max_tokens=300)\n", " return output[\"choices\"][0][\"text\"]\n", "\n", "# ✅ Example Query\n", "user_query = \"My girlfriend is ignoring me. What should I do?\"\n", "response = generate_response(user_query)\n", "print(response)\n" ], "metadata": { "id": "sD1qhVc8HhJN" }, "execution_count": null, "outputs": [] } ] }