File size: 3,016 Bytes
dbb1f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ce179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dbb1f2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e1ce179
dbb1f2e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/usr/bin/env python

import streamlit as st
st.set_page_config(layout='wide')
from streamlit_option_menu import option_menu
from langchain_community.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain_core.callbacks import StreamingStdOutCallbackHandler
from langchain.retrievers import TFIDFRetriever
from langchain.chains import RetrievalQA
from langchain.memory import ConversationBufferMemory
from langchain_community.chat_models import ChatLlamaCpp
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate



callbacks = [StreamingStdOutCallbackHandler()]
print("creating ll started")
M_NAME = "finbro-v0.1.0-llama-3-8B-instruct-1m.gguf"
llm = ChatLlamaCpp(
    model_path=M_NAME,
    n_batch=8,
    temperature=0.85,
    max_tokens=256,
    top_p=0.95,
    top_k = 10,
    callback_manager=callbacks,
    n_ctx=2048,
    verbose=True,  # Verbose is required to pass to the callback manager
)
print("creating ll ended")

# for without memory
template = """You are the Finiantial expert:
### Instruction:
{question}
### Input:
### Response:
"""

prompt1 = PromptTemplate(template=template, input_variables=["question"])
print("test2")
llm_chain_model = LLMChain(prompt=prompt1, llm=llm)

# for retriver


# def format_docs(docs):
#     return "\n\n".join(doc.page_content for doc in docs)

# model_name = "BAAI/bge-base-en-v1.5"
# model_kwargs = {"device":'cpu'}
# encode_kwargs = {'normalize_embeddings':True}

# hf = HuggingFaceEmbeddings(
#     model_name = model_name,
#     model_kwargs = model_kwargs,
#     encode_kwargs = encode_kwargs
# )


# vectorstore = Chroma(
#     collection_name="example_collection",
#     embedding_function=hf,
#     persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not neccesary
# )

# retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})
# template = """you are the financial ai assistant
# {context}
# Question: {question}
# Helpful Answer:"""
# custom_rag_prompt = PromptTemplate.from_template(template)

# rag_chain = (
#     {"context": retriever | format_docs, "question": RunnablePassthrough()}
#     | custom_rag_prompt
#     | llm
#     | StrOutputParser()
# )
# print("retriver done")

from fastapi import FastAPI

app = FastAPI()

@app.get("/")
async def read_root():
    return {"message": "Hello, Welcome to Finanicail LLM"}


# from fastapi import FastAPI
from pydantic import BaseModel

# app = FastAPI()

# Define a Pydantic model for the request body
class Item(BaseModel):
    question: str


@app.post("/prompt/")
def create_item(item: Item):
    message_response = llm_chain_model.run(item.get('question'))
    return {"item": item, "message": "LLm response", 'response': message_response}