import os from typing import List, Tuple from langchain_community.llms import GPT4All from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough import bs4 import textwrap from langchain.chains import create_retrieval_chain #from langchain.chains.combine_documents import create_stuff_documents_chain from langchain_community.vectorstores import FAISS #from langchain_community.document_loaders import WebBaseLoader from langchain_core.prompts import ChatPromptTemplate from langchain_community.embeddings import LlamaCppEmbeddings from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain_core.callbacks import BaseCallbackHandler from langchain_community.document_loaders import TextLoader from google.colab import drive drive.mount('/content/drive') local_path = "/content/drive/MyDrive/Model/aya-23-8B-Q3_K_S.gguf" # "/content/drive/MyDrive/Dorna-Llama3-8B-Instruct.Q5_0.gguf" # model_path = "/content/drive/MyDrive/Model/labse.Q3_K_S.gguf" # "/content/drive/MyDrive/labse.Q6_K.gguf" # text_path = "/content/drive/MyDrive/gpt4all/docs/Books/chmn.txt" index_path = "/content/drive/MyDrive/gpt4all/index_CHEHEL_MAJLESE_NOOR" def initialize_embeddings() -> LlamaCppEmbeddings: return LlamaCppEmbeddings(model_path=model_path) def load_documents() -> List: loader = TextLoader(text_path) return loader.load() def split_chunks(sources: List) -> List: chunks = [] splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32) for chunk in splitter.split_documents(sources): chunks.append(chunk) return chunks def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS: texts = [doc.page_content for doc in chunks] metadatas = [doc.metadata for doc in chunks] return FAISS.from_texts(texts, embeddings, metadatas=metadatas) class MyCustomHandler(BaseCallbackHandler): def on_llm_new_token(self, token: str, **kwargs) -> None: print(token), llm = GPT4All( model=local_path, n_threads=150, streaming=True,verbose=False)#,device='cuda:Tesla T4') # # callbacks=[MyCustomHandler()], # # 1. Load, chunk and index the contents of the blog to create a retriever. # loader = WebBaseLoader( # web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",), # bs_kwargs=dict( # parse_only=bs4.SoupStrainer( # class_=("post-content", "post-title", "post-header") # ) # ), # ) def format_docs(docs): return "\n\n".join(doc.page_content for doc in docs) #docs = loader.load() #text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) #splits = text_splitter.split_documents(docs) #vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings()) #retriever = vectorstore.as_retriever() ########## attention embeddings = initialize_embeddings() rebuilIndex = input('Rebuild Index (y/n)?') if rebuilIndex=='y': #start = time.time() sources = load_documents() chunks = split_chunks(sources) vectorstore = generate_index(chunks, embeddings) vectorstore.save_local(index_path) #end = time.time() #elapsed = end - start #print('Elapsed time to build index: ' + str(elapsed)) index = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True) retriver = index.as_retriever() # 2. Incorporate the retriever into a question-answering chain. system_prompt = ( """You are an assistant for question-answering tasks. " "Only use the {context} to answer: " "لطفاً فقط به زبان فارسی صحبت کن و تمام پاسخ ها را به زبان فارسی بنویس " "لطفا پاسخ هایت طولانی باشد " "اگر پاسخ سوال را نیافتی بگو نمیدانم" "\n\n""" ) prompt = ChatPromptTemplate.from_messages( [ ("system", system_prompt), ("human", "{input}"), ] ) ##question_answer_chain = create_stuff_documents_chain(llm, prompt) ##rag_chain = create_retrieval_chain(retriver , question_answer_chain) # retriever #result = rag_chain.invoke({"input": "What is Task Decomposition?"}) # second edit rag_chain_from_docs = ( { "input": lambda x: x["input"], # input query "context": lambda x: format_docs(x["context"]), # context } | prompt # format query and context into prompt | llm # generate response | StrOutputParser() # coerce to string ) # Pass input query to retriever retrieve_docs = (lambda x: x["input"]) | retriver # Below, we chain `.assign` calls. This takes a dict and successively # adds keys-- "context" and "answer"-- where the value for each key # is determined by a Runnable. The Runnable operates on all existing # keys in the dict. chain = RunnablePassthrough.assign(context=retrieve_docs).assign( answer=rag_chain_from_docs ) chat_history = [] while True: query = input("پرسش تان را بپرسید. حقیر در خدمتم: ") if query.lower() == 'exit': break response = chain.invoke({"input": query}) print(textwrap.fill(response['answer'],80))