Spaces:
Sleeping
Sleeping
| # Import required libraries | |
| import PyPDF2 | |
| from getpass import getpass | |
| from haystack.nodes import PreProcessor, PromptModel, PromptTemplate, PromptNode, AnswerParser | |
| from haystack.document_stores import InMemoryDocumentStore | |
| from haystack import Document, Pipeline | |
| from haystack.nodes import BM25Retriever | |
| from pprint import pprint | |
| import streamlit as st | |
| import logging | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import os | |
| import logging | |
| logging.basicConfig(level=logging.DEBUG) | |
| # Function to extract text from a PDF | |
| def extract_text_from_pdf(pdf_path): | |
| text = "" | |
| with open(pdf_path, "rb") as pdf_file: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| text += page.extract_text() or "" | |
| return text | |
| # Extract text from the PDF file | |
| pdf_file_path = "Data/MR. MPROFY.pdf" | |
| pdf_text = extract_text_from_pdf(pdf_file_path) | |
| if not pdf_text: | |
| raise ValueError("No text extracted from PDF.") | |
| # Create a Haystack document | |
| doc = Document(content=pdf_text, meta={"name": "MR. MPROFY"}) | |
| # Initialize Document Store | |
| document_store = InMemoryDocumentStore(use_bm25=True) | |
| document_store.write_documents([doc]) | |
| # Initialize Retriever | |
| retriever = BM25Retriever(document_store=document_store, top_k=2) | |
| # Define QA Template | |
| qa_template = PromptTemplate( | |
| prompt=""" | |
| Hi, I'm Mprofier, your friendly AI assistant. I'm here to provide direct and concise answers to your specific questions. | |
| I won’t ask any follow-up questions myself. | |
| If I can't find the answer in the provided context, I'll simply state that I don't have enough information to answer. | |
| Context: {join(documents)}; | |
| Question: {query} | |
| Answer: | |
| """, | |
| output_parser=AnswerParser() | |
| ) | |
| # Get Huggingface token | |
| HF_TOKEN = os.getenv('HF_KEY') | |
| # Initialize Prompt Node | |
| prompt_node = PromptNode( | |
| model_name_or_path="mistralai/Mixtral-8x7B-Instruct-v0.1", | |
| api_key=HF_TOKEN, | |
| default_prompt_template=qa_template, | |
| max_length=500, | |
| model_kwargs={"model_max_length": 5000} | |
| ) | |
| # Build Pipeline | |
| rag_pipeline = Pipeline() | |
| rag_pipeline.add_node(component=retriever, name="retriever", inputs=["Query"]) | |
| rag_pipeline.add_node(component=prompt_node, name="prompt_node", inputs=["retriever"]) | |
| # Streamlit Function for Handling Input and Displaying Output | |
| def run_streamlit_app(): | |
| st.title("Mprofier - AI Assistant") | |
| query_text = st.text_input("Enter your question:") | |
| if st.button("Get Answer"): | |
| response = rag_pipeline.run(query=query_text) | |
| answer = response["answers"][0].answer if response["answers"] else "No answer found." | |
| st.write(answer) | |
| # Start the Streamlit application | |
| if __name__ == "__main__": | |
| run_streamlit_app() | |