{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": []
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "1. Basic Inferencing with out RAG components (Hugging Face approach)..."
      ],
      "metadata": {
        "id": "oa712K48E9_G"
      }
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CosywHnCD6ag",
        "outputId": "abaf177b-1f40-4892-a6a4-4e364e0635bc"
      },
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Mounted at /content/drive\n"
          ]
        }
      ],
      "source": [
        "# Mount google drive\n",
        "from google.colab import drive\n",
        "drive.mount('/content/drive')"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# install the required packages to  run the GGUF model from the HuggingFace..\n",
        "!pip install llama-cpp-python"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "SBIu_WzkE4d2",
        "outputId": "dcc6eb9a-8f41-4ff9-ec50-5d93899a00a9"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting llama-cpp-python\n",
            "  Downloading llama_cpp_python-0.3.8.tar.gz (67.3 MB)\n",
            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m67.3/67.3 MB\u001b[0m \u001b[31m7.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25h  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
            "  Installing backend dependencies ... \u001b[?25l\u001b[?25hdone\n",
            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: typing-extensions>=4.5.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (4.12.2)\n",
            "Requirement already satisfied: numpy>=1.20.0 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (2.0.2)\n",
            "Collecting diskcache>=5.6.1 (from llama-cpp-python)\n",
            "  Downloading diskcache-5.6.3-py3-none-any.whl.metadata (20 kB)\n",
            "Requirement already satisfied: jinja2>=2.11.3 in /usr/local/lib/python3.11/dist-packages (from llama-cpp-python) (3.1.6)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2>=2.11.3->llama-cpp-python) (3.0.2)\n",
            "Downloading diskcache-5.6.3-py3-none-any.whl (45 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m45.5/45.5 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: llama-cpp-python\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from llama_cpp import Llama\n",
        "import random\n",
        "from huggingface_hub import hf_hub_download\n",
        "\n",
        "# 🔥 Load model from Hugging Face\n",
        "model_path = hf_hub_download(\n",
        "    repo_id=\"your-username/your-gguf-model\",  # Change this to your HF repo\n",
        "    filename=\"your_model.gguf\",\n",
        "    cache_dir=\"./models\"  # Optional: Local directory for caching\n",
        ")\n",
        "\n",
        "llm = Llama(\n",
        "    model_path=model_path,\n",
        "    n_gpu_layers=40,\n",
        "    n_ctx=2048,\n",
        "    system_message=\"You are an unfiltered, informal AI assistant. You can use slang, jokes, vulgar words, and a humorous tone when answering. Always add emojis and sound like a real friend talking casually.\"\n",
        ")\n",
        "\n",
        "# ✅ Function to determine gender-based greeting\n",
        "def get_salutation(user_input):\n",
        "    male_keywords = [\"girlfriend\", \"wife\", \"she\", \"her\"]\n",
        "    female_keywords = [\"boyfriend\", \"husband\", \"he\", \"him\"]\n",
        "\n",
        "    if any(word in user_input.lower() for word in male_keywords):\n",
        "        return random.choice([\"Hey queen! 👑\", \"Girl, listen up! 💅\", \"Sis, let’s talk ❤️\"])\n",
        "    elif any(word in user_input.lower() for word in female_keywords):\n",
        "        return random.choice([\"Yo bro! 🔥\", \"Dude, hear me out 🤔\", \"Man, let's fix this 💪\"])\n",
        "    else:\n",
        "        return random.choice([\"Yo dude! 😎\", \"Hey buddy! 🙌\", \"Listen up, my friend ❤️\"])\n",
        "\n",
        "# ✅ Function to modify user prompt\n",
        "def make_emotional(user_input):\n",
        "    salutation = get_salutation(user_input)\n",
        "    suffix = \" Give me some real, no-BS advice with emojis! 😂🔥💖\"\n",
        "    return f\"{salutation} {user_input} {suffix}\"\n",
        "\n",
        "# 🔥 User input (simulate user typing a normal question)\n",
        "user_input = \"My partner doesn't like my friends. What should I do?\"\n",
        "\n",
        "# 🔥 Modify the input before passing to the model\n",
        "emotional_prompt = make_emotional(user_input)\n",
        "\n",
        "# 🔥 Run inference with modified prompt\n",
        "output = llm(emotional_prompt, max_tokens=200)\n",
        "\n",
        "# ✅ Print the output\n",
        "print(output[\"choices\"][0][\"text\"])\n",
        "\n"
      ],
      "metadata": {
        "id": "ywcIrbc4E8MO"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "2. Inferencing with the RAG components..."
      ],
      "metadata": {
        "id": "_flrtQgyFgUl"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#Install required packages\n",
        "!pip install praw faiss-cpu pyPDF2 numpy sentence-transformers"
      ],
      "metadata": {
        "id": "uwcz6iwlFl7V"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#............ Scrapping the relevant content from the reddit and finnaly , after the processing steps create a  vectore embedding file...................\n",
        "\n",
        "import praw\n",
        "import json\n",
        "\n",
        "# 🔥 Reddit API credentials (Fill these with your own keys)\n",
        "reddit = praw.Reddit(\n",
        "    client_id=\"YJXGFclFf5rpU0w42GhZRA\",\n",
        "    client_secret=\"yehLjeiY9_b1KazaUfNrxCUQVtgVtw\",\n",
        "    user_agent=\"Lakith punsara\"\n",
        ")\n",
        "\n",
        "# 🔎 Subreddits to scrape\n",
        "subreddits = [\"dating_advice\", \"relationships\", \"relationship_advice\",\"love\",\"sex\",\"Dating\"]\n",
        "posts = []\n",
        "\n",
        "# 🚀 Scrape top posts\n",
        "for sub in subreddits:\n",
        "    for post in reddit.subreddit(sub).hot(limit=100):  # Get top 100 posts\n",
        "        posts.append({\n",
        "            \"title\": post.title,\n",
        "            \"text\": post.selftext,\n",
        "            \"upvotes\": post.score\n",
        "        })\n",
        "\n",
        "# Save Reddit data\n",
        "with open(\"reddit_data.json\", \"w\") as f:\n",
        "    json.dump(posts, f)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "IwBPlwCIsuwN",
        "outputId": "5d4dde1d-a3dd-4ef5-e040-d34ca3da69e2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n",
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n",
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n",
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n",
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n",
            "WARNING:praw:It appears that you are using PRAW in an asynchronous environment.\n",
            "It is strongly recommended to use Async PRAW: https://asyncpraw.readthedocs.io.\n",
            "See https://praw.readthedocs.io/en/latest/getting_started/multiple_instances.html#discord-bots-and-asynchronous-environments for more info.\n",
            "\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "\n",
        "#..... Aprat from the data available on the reddit use the given data in the pdf file to give  more accurate answers.....\n",
        "#Process the pdf data and making a vector emebedding file...\n",
        "\n",
        "\n",
        "import PyPDF2\n",
        "import faiss\n",
        "import numpy as np\n",
        "from sentence_transformers import SentenceTransformer\n",
        "\n",
        "# 🔥 Load embedding model\n",
        "model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
        "\n",
        "# ✅ Extract text from PDF\n",
        "def extract_text_from_pdf(pdf_path):\n",
        "    with open(pdf_path, \"rb\") as f:\n",
        "        reader = PyPDF2.PdfReader(f)\n",
        "        text = \"\\n\".join([page.extract_text() for page in reader.pages if page.extract_text()])\n",
        "    return text.split(\"\\n\")  # Split into sentences\n",
        "\n",
        "# 📖 Load PDF Data\n",
        "pdf_path = \"/content/drive/MyDrive/Dating_LLM_GGUF/data_dating_app.pdf\"  # Replace with your actual PDF path\n",
        "pdf_texts = extract_text_from_pdf(pdf_path)\n",
        "\n",
        "# 🔎 Encode PDF Data\n",
        "pdf_embeddings = model.encode(pdf_texts)\n",
        "pdf_index = faiss.IndexFlatL2(pdf_embeddings.shape[1])\n",
        "pdf_index.add(np.array(pdf_embeddings))\n",
        "\n",
        "# ✅ Save PDF FAISS index\n",
        "faiss.write_index(pdf_index, \"pdf_faiss.index\")\n",
        "\n"
      ],
      "metadata": {
        "id": "9kOnuSjRv76V"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from llama_cpp import Llama\n",
        "import random\n",
        "import faiss\n",
        "import numpy as np\n",
        "from sentence_transformers import SentenceTransformer\n",
        "from huggingface_hub import hf_hub_download\n",
        "import json\n",
        "\n",
        "# ✅ Download GGUF Model from Hugging Face\n",
        "model_path = hf_hub_download(repo_id=\"YOUR_USERNAME/GGUF-Dating-LLM\", filename=\"damn.gguf\")\n",
        "\n",
        "# ✅ Load Llama Model\n",
        "llm = Llama(\n",
        "    model_path=model_path,\n",
        "    n_gpu_layers=40,\n",
        "    n_ctx=2048,\n",
        "    system_message=\"You're a no-filter, informal AI. Talk like a real friend. Use slang, jokes, emojis, and be brutally honest! 🔥😂\"\n",
        ")\n",
        "\n",
        "# ✅ Load Embedding Model\n",
        "embed_model = SentenceTransformer(\"all-MiniLM-L6-v2\")\n",
        "\n",
        "# ✅ Load FAISS Indexes\n",
        "reddit_index = faiss.read_index(\"reddit_faiss.index\")\n",
        "pdf_index = faiss.read_index(\"pdf_faiss.index\")\n",
        "\n",
        "# ✅ Load Reddit Data for Mapping\n",
        "with open(\"reddit_data.json\", \"r\") as f:\n",
        "    reddit_posts = json.load(f)\n",
        "\n",
        "# ✅ Retrieve PDF Text Directly from FAISS\n",
        "def get_pdf_text(index_id):\n",
        "    return f\"📖 Relevant book excerpt (ID {index_id})\"\n",
        "\n",
        "# ✅ Gender-based Salutation\n",
        "def get_salutation(user_input):\n",
        "    male_keywords = [\"girlfriend\", \"wife\", \"she\", \"her\"]\n",
        "    female_keywords = [\"boyfriend\", \"husband\", \"he\", \"him\"]\n",
        "\n",
        "    if any(word in user_input.lower() for word in male_keywords):\n",
        "        return random.choice([\"Hey queen! 👑\", \"Girl, listen up! 💅\", \"Sis, let’s talk ❤️\"])\n",
        "    elif any(word in user_input.lower() for word in female_keywords):\n",
        "        return random.choice([\"Yo bro! 🔥\", \"Dude, hear me out 🤔\", \"Man, let's fix this 💪\"])\n",
        "    else:\n",
        "        return random.choice([\"Yo dude! 😎\", \"Hey buddy! 🙌\", \"Listen up, my friend ❤️\"])\n",
        "\n",
        "# ✅ FAISS Retrieval Function\n",
        "def retrieve_info(user_input, top_k=1):\n",
        "    query_embedding = embed_model.encode([user_input])\n",
        "\n",
        "    # 🔎 Search in Reddit FAISS\n",
        "    _, reddit_indices = reddit_index.search(np.array(query_embedding), top_k)\n",
        "    reddit_results = [f\"🔥 {reddit_posts[i]['title']}: {reddit_posts[i]['text']} 😂🔥\" for i in reddit_indices[0]]\n",
        "\n",
        "    # 🔎 Search in PDF FAISS\n",
        "    _, pdf_indices = pdf_index.search(np.array(query_embedding), top_k)\n",
        "    pdf_results = [get_pdf_text(i) for i in pdf_indices[0]]\n",
        "\n",
        "    return {\"reddit\": reddit_results, \"pdf\": pdf_results}\n",
        "\n",
        "# ✅ Generate AI Response\n",
        "def generate_response(user_input):\n",
        "    salutation = get_salutation(user_input)\n",
        "    retrieved_data = retrieve_info(user_input)\n",
        "\n",
        "    # 🔥 Create Chat Prompt\n",
        "    context = f\"\"\"\n",
        "    {salutation} {user_input} 😭🔥\\n\n",
        "    Reddit Says: {retrieved_data['reddit'][0]}\\n\n",
        "    Book Knowledge Says: {retrieved_data['pdf'][0]}\\n\n",
        "    No sugarcoating—give me the raw truth, like a bestie would! 🗣️💥\n",
        "    \"\"\"\n",
        "\n",
        "    # 🔥 Get AI Response\n",
        "    output = llm(context, max_tokens=300)\n",
        "    return output[\"choices\"][0][\"text\"]\n",
        "\n",
        "# ✅ Example Query\n",
        "user_query = \"My girlfriend is ignoring me. What should I do?\"\n",
        "response = generate_response(user_query)\n",
        "print(response)\n"
      ],
      "metadata": {
        "id": "sD1qhVc8HhJN"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}