MohsenParsa commited on
Commit
5c0755e
·
verified ·
1 Parent(s): c2aee8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -60
app.py CHANGED
@@ -1,63 +1,137 @@
1
- import gradio as gr
2
- from huggingface_hub import InferenceClient
3
-
4
- """
5
- For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
- """
7
- client = InferenceClient("bartowski/aya-23-35B-GGUF")
8
-
9
-
10
- def respond(
11
- message,
12
- history: list[tuple[str, str]],
13
- system_message,
14
- max_tokens,
15
- temperature,
16
- top_p,
17
- ):
18
- messages = [{"role": "system", "content": system_message}]
19
-
20
- for val in history:
21
- if val[0]:
22
- messages.append({"role": "user", "content": val[0]})
23
- if val[1]:
24
- messages.append({"role": "assistant", "content": val[1]})
25
-
26
- messages.append({"role": "user", "content": message})
27
-
28
- response = ""
29
-
30
- for message in client.chat_completion(
31
- messages,
32
- max_tokens=max_tokens,
33
- stream=True,
34
- temperature=temperature,
35
- top_p=top_p,
36
- ):
37
- token = message.choices[0].delta.content
38
-
39
- response += token
40
- yield response
41
-
42
- """
43
- For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
44
- """
45
- demo = gr.ChatInterface(
46
- respond,
47
- additional_inputs=[
48
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
49
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
50
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
51
- gr.Slider(
52
- minimum=0.1,
53
- maximum=1.0,
54
- value=0.95,
55
- step=0.05,
56
- label="Top-p (nucleus sampling)",
57
- ),
58
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
1
+ import os
2
+ from typing import List, Tuple
3
+ from langchain_community.llms import GPT4All
4
+ from langchain_core.output_parsers import StrOutputParser
5
+ from langchain_core.runnables import RunnablePassthrough
6
+ import bs4
7
+ import textwrap
8
+ from langchain.chains import create_retrieval_chain
9
+ #from langchain.chains.combine_documents import create_stuff_documents_chain
10
+ from langchain_community.vectorstores import FAISS
11
+ #from langchain_community.document_loaders import WebBaseLoader
12
+ from langchain_core.prompts import ChatPromptTemplate
13
+ from langchain_community.embeddings import LlamaCppEmbeddings
14
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
15
+
16
+ from langchain_core.callbacks import BaseCallbackHandler
17
+ from langchain_community.document_loaders import TextLoader
18
+
19
+ local_path = "/content/drive/MyDrive/Model/aya-23-8B-Q3_K_S.gguf" # "/content/drive/MyDrive/Dorna-Llama3-8B-Instruct.Q5_0.gguf" #
20
+ model_path = "/content/drive/MyDrive/Model/labse.Q3_K_S.gguf" # "/content/drive/MyDrive/labse.Q6_K.gguf" #
21
+ text_path = "/content/drive/MyDrive/gpt4all/docs/Books/chmn.txt"
22
+ index_path = "/content/drive/MyDrive/gpt4all/index_CHEHEL_MAJLESE_NOOR"
23
+
24
+ def initialize_embeddings() -> LlamaCppEmbeddings:
25
+ return LlamaCppEmbeddings(model_path=model_path)
26
+
27
+ def load_documents() -> List:
28
+ loader = TextLoader(text_path)
29
+ return loader.load()
30
+
31
+ def split_chunks(sources: List) -> List:
32
+ chunks = []
33
+ splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=32)
34
+ for chunk in splitter.split_documents(sources):
35
+ chunks.append(chunk)
36
+ return chunks
37
+
38
+ def generate_index(chunks: List, embeddings: LlamaCppEmbeddings) -> FAISS:
39
+ texts = [doc.page_content for doc in chunks]
40
+ metadatas = [doc.metadata for doc in chunks]
41
+ return FAISS.from_texts(texts, embeddings, metadatas=metadatas)
42
+
43
+ class MyCustomHandler(BaseCallbackHandler):
44
+ def on_llm_new_token(self, token: str, **kwargs) -> None:
45
+ print(token),
46
+
47
+ llm = GPT4All( model=local_path, n_threads=150, streaming=True,verbose=False)#,device='cuda:Tesla T4') #
48
+ # callbacks=[MyCustomHandler()],
49
+ # # 1. Load, chunk and index the contents of the blog to create a retriever.
50
+ # loader = WebBaseLoader(
51
+ # web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
52
+ # bs_kwargs=dict(
53
+ # parse_only=bs4.SoupStrainer(
54
+ # class_=("post-content", "post-title", "post-header")
55
+ # )
56
+ # ),
57
+ # )
58
+ def format_docs(docs):
59
+ return "\n\n".join(doc.page_content for doc in docs)
60
+
61
+ #docs = loader.load()
62
+
63
+ #text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
64
+ #splits = text_splitter.split_documents(docs)
65
+ #vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
66
+ #retriever = vectorstore.as_retriever() ########## attention
67
+
68
+ embeddings = initialize_embeddings()
69
+ rebuilIndex = input('Rebuild Index (y/n)?')
70
+ if rebuilIndex=='y':
71
+ #start = time.time()
72
+ sources = load_documents()
73
+ chunks = split_chunks(sources)
74
+ vectorstore = generate_index(chunks, embeddings)
75
+ vectorstore.save_local(index_path)
76
+ #end = time.time()
77
+ #elapsed = end - start
78
+ #print('Elapsed time to build index: ' + str(elapsed))
79
+
80
+ index = FAISS.load_local(index_path, embeddings,allow_dangerous_deserialization=True)
81
+
82
+ retriver = index.as_retriever()
83
+
84
+ # 2. Incorporate the retriever into a question-answering chain.
85
+ system_prompt = (
86
+ """You are an assistant for question-answering tasks. "
87
+ "Only use the {context} to answer: "
88
+ "لطفاً فقط به زبان فارسی صحبت کن و تمام پاسخ ها را به زبان فارسی بنویس "
89
+ "لطفا پاسخ هایت طولانی باشد "
90
+ "اگر پاسخ سوال را نیافتی بگو نمیدانم"
91
+ "\n\n"""
92
+
93
+ )
94
+
95
+ prompt = ChatPromptTemplate.from_messages(
96
+ [
97
+ ("system", system_prompt),
98
+ ("human", "{input}"),
99
+ ]
100
  )
101
 
102
+ ##question_answer_chain = create_stuff_documents_chain(llm, prompt)
103
+ ##rag_chain = create_retrieval_chain(retriver , question_answer_chain) # retriever
104
+
105
+ #result = rag_chain.invoke({"input": "What is Task Decomposition?"})
106
+
107
+ # second edit
108
+
109
+ rag_chain_from_docs = (
110
+ {
111
+ "input": lambda x: x["input"], # input query
112
+ "context": lambda x: format_docs(x["context"]), # context
113
+ }
114
+ | prompt # format query and context into prompt
115
+ | llm # generate response
116
+ | StrOutputParser() # coerce to string
117
+ )
118
+
119
+ # Pass input query to retriever
120
+ retrieve_docs = (lambda x: x["input"]) | retriver
121
+
122
+ # Below, we chain `.assign` calls. This takes a dict and successively
123
+ # adds keys-- "context" and "answer"-- where the value for each key
124
+ # is determined by a Runnable. The Runnable operates on all existing
125
+ # keys in the dict.
126
+ chain = RunnablePassthrough.assign(context=retrieve_docs).assign(
127
+ answer=rag_chain_from_docs
128
+ )
129
+ chat_history = []
130
+ while True:
131
+ query = input("پرسش تان را بپرسید. حقیر در خدمتم: ")
132
+
133
+ if query.lower() == 'exit':
134
+ break
135
 
136
+ response = chain.invoke({"input": query})
137
+ print(textwrap.fill(response['answer'],80))