File size: 5,173 Bytes
5c0755e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0b3d34
 
 
5c0755e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500a43e
 
5c0755e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500a43e
5c0755e
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
from typing import List, Tuple
from langchain_community.llms import GPT4All
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import bs4
import textwrap
from langchain.chains import create_retrieval_chain
#from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
#from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter

from langchain_core.callbacks import BaseCallbackHandler
from langchain_community.document_loaders import TextLoader

from google.colab import drive
drive.mount('/content/drive')

local_path = "/content/drive/MyDrive/Model/aya-23-8B-Q3_K_S.gguf" # "/content/drive/MyDrive/Dorna-Llama3-8B-Instruct.Q5_0.gguf" # 
model_path = "/content/drive/MyDrive/Model/labse.Q3_K_S.gguf" # "/content/drive/MyDrive/labse.Q6_K.gguf" # 
text_path = "/content/drive/MyDrive/gpt4all/docs/Books/chmn.txt"
index_path = "/content/drive/MyDrive/gpt4all/index_CHEHEL_MAJLESE_NOOR"

def initialize_embeddings() -> LlamaCppEmbeddings:
    return LlamaCppEmbeddings(model_path=model_path)

def load_documents() -> List:
    loader = TextLoader(text_path)
    return loader.load()

def split_chunks(sources: List) -> List:
    chunks = []
    splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
    for chunk in splitter.split_documents(sources):
        chunks.append(chunk)
    return chunks

def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS:
    texts = [doc.page_content for doc in chunks]
    metadatas = [doc.metadata for doc in chunks]
    return FAISS.from_texts(texts, embeddings, metadatas=metadatas)

class MyCustomHandler(BaseCallbackHandler):
    def on_llm_new_token(self, token: str, **kwargs) -> None:             
        print(token),

llm = GPT4All( model=local_path, n_threads=150, streaming=True,verbose=False)#,device='cuda:Tesla T4') #
# callbacks=[MyCustomHandler()],
# # 1. Load, chunk and index the contents of the blog to create a retriever.
# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

#docs = loader.load()

#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#splits = text_splitter.split_documents(docs)
#vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
#retriever = vectorstore.as_retriever() ########## attention

embeddings = initialize_embeddings()
rebuilIndex = input('Rebuild Index (y/n)?')
if rebuilIndex=='y':
  #start = time.time()
  sources = load_documents()
  chunks = split_chunks(sources)
  vectorstore = generate_index(chunks, embeddings)
  vectorstore.save_local(index_path)
  #end = time.time()
  #elapsed = end - start
  #print('Elapsed time to build index: ' + str(elapsed))

index = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)

retriver = index.as_retriever()

# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (    
    """You are an assistant for question-answering tasks. "
    "Only use the {context} to answer: "   
    "لطفاً فقط به زبان فارسی صحبت کن و تمام پاسخ ها را به زبان فارسی بنویس "    
    "لطفا پاسخ هایت طولانی باشد "   
     "اگر پاسخ سوال را نیافتی بگو نمیدانم"
    "\n\n"""
    
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

##question_answer_chain = create_stuff_documents_chain(llm, prompt)
##rag_chain = create_retrieval_chain(retriver  , question_answer_chain) # retriever

#result = rag_chain.invoke({"input": "What is Task Decomposition?"})

# second edit

rag_chain_from_docs = (
    {
        "input": lambda x: x["input"],  # input query
        "context": lambda x: format_docs(x["context"]),  # context
    }
    | prompt  # format query and context into prompt
    | llm  # generate response
    | StrOutputParser()  # coerce to string
)

# Pass input query to retriever
retrieve_docs = (lambda x: x["input"]) | retriver

# Below, we chain `.assign` calls. This takes a dict and successively
# adds keys-- "context" and "answer"-- where the value for each key
# is determined by a Runnable. The Runnable operates on all existing
# keys in the dict.
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
    answer=rag_chain_from_docs
)
chat_history = []
while True:
    query = input("پرسش تان را بپرسید. حقیر در خدمتم: ")
    
    if query.lower() == 'exit':
        break

    response = chain.invoke({"input": query})
    print(textwrap.fill(response['answer'],80))