Spaces:
Runtime error
Runtime error
File size: 5,173 Bytes
5c0755e a0b3d34 5c0755e 500a43e 5c0755e 500a43e 5c0755e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import os
from typing import List, Tuple
from langchain_community.llms import GPT4All
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
import bs4
import textwrap
from langchain.chains import create_retrieval_chain
#from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import FAISS
#from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.embeddings import LlamaCppEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.callbacks import BaseCallbackHandler
from langchain_community.document_loaders import TextLoader
from google.colab import drive
drive.mount('/content/drive')
local_path = "/content/drive/MyDrive/Model/aya-23-8B-Q3_K_S.gguf" # "/content/drive/MyDrive/Dorna-Llama3-8B-Instruct.Q5_0.gguf" #
model_path = "/content/drive/MyDrive/Model/labse.Q3_K_S.gguf" # "/content/drive/MyDrive/labse.Q6_K.gguf" #
text_path = "/content/drive/MyDrive/gpt4all/docs/Books/chmn.txt"
index_path = "/content/drive/MyDrive/gpt4all/index_CHEHEL_MAJLESE_NOOR"
def initialize_embeddings() -> LlamaCppEmbeddings:
return LlamaCppEmbeddings(model_path=model_path)
def load_documents() -> List:
loader = TextLoader(text_path)
return loader.load()
def split_chunks(sources: List) -> List:
chunks = []
splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
for chunk in splitter.split_documents(sources):
chunks.append(chunk)
return chunks
def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS:
texts = [doc.page_content for doc in chunks]
metadatas = [doc.metadata for doc in chunks]
return FAISS.from_texts(texts, embeddings, metadatas=metadatas)
class MyCustomHandler(BaseCallbackHandler):
def on_llm_new_token(self, token: str, **kwargs) -> None:
print(token),
llm = GPT4All( model=local_path, n_threads=150, streaming=True,verbose=False)#,device='cuda:Tesla T4') #
# callbacks=[MyCustomHandler()],
# # 1. Load, chunk and index the contents of the blog to create a retriever.
# loader = WebBaseLoader(
# web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
# bs_kwargs=dict(
# parse_only=bs4.SoupStrainer(
# class_=("post-content", "post-title", "post-header")
# )
# ),
# )
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
#docs = loader.load()
#text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#splits = text_splitter.split_documents(docs)
#vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
#retriever = vectorstore.as_retriever() ########## attention
embeddings = initialize_embeddings()
rebuilIndex = input('Rebuild Index (y/n)?')
if rebuilIndex=='y':
#start = time.time()
sources = load_documents()
chunks = split_chunks(sources)
vectorstore = generate_index(chunks, embeddings)
vectorstore.save_local(index_path)
#end = time.time()
#elapsed = end - start
#print('Elapsed time to build index: ' + str(elapsed))
index = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
retriver = index.as_retriever()
# 2. Incorporate the retriever into a question-answering chain.
system_prompt = (
"""You are an assistant for question-answering tasks. "
"Only use the {context} to answer: "
"لطفاً فقط به زبان فارسی صحبت کن و تمام پاسخ ها را به زبان فارسی بنویس "
"لطفا پاسخ هایت طولانی باشد "
"اگر پاسخ سوال را نیافتی بگو نمیدانم"
"\n\n"""
)
prompt = ChatPromptTemplate.from_messages(
[
("system", system_prompt),
("human", "{input}"),
]
)
##question_answer_chain = create_stuff_documents_chain(llm, prompt)
##rag_chain = create_retrieval_chain(retriver , question_answer_chain) # retriever
#result = rag_chain.invoke({"input": "What is Task Decomposition?"})
# second edit
rag_chain_from_docs = (
{
"input": lambda x: x["input"], # input query
"context": lambda x: format_docs(x["context"]), # context
}
| prompt # format query and context into prompt
| llm # generate response
| StrOutputParser() # coerce to string
)
# Pass input query to retriever
retrieve_docs = (lambda x: x["input"]) | retriver
# Below, we chain `.assign` calls. This takes a dict and successively
# adds keys-- "context" and "answer"-- where the value for each key
# is determined by a Runnable. The Runnable operates on all existing
# keys in the dict.
chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
answer=rag_chain_from_docs
)
chat_history = []
while True:
query = input("پرسش تان را بپرسید. حقیر در خدمتم: ")
if query.lower() == 'exit':
break
response = chain.invoke({"input": query})
print(textwrap.fill(response['answer'],80)) |