| | import os |
| | import gradio as gr |
| | import json |
| | from huggingface_hub import InferenceClient |
| | import gspread |
| | from google.oauth2 import service_account |
| | from datetime import datetime |
| | import chromadb |
| |
|
| | |
| | scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"] |
| | key1 = os.getenv("key1") |
| | key2 = os.getenv("key2") |
| | key3 = os.getenv("key3") |
| | key4 = os.getenv("key4") |
| | key5 = os.getenv("key5") |
| | key6 = os.getenv("key6") |
| | key7 = os.getenv("key7") |
| | key8 = os.getenv("key8") |
| | key9 = os.getenv("key9") |
| | key10 = os.getenv("key10") |
| | key11 = os.getenv("key11") |
| | key12 = os.getenv("key12") |
| | key13 = os.getenv("key13") |
| | key14 = os.getenv("key14") |
| | key15 = os.getenv("key15") |
| | key16 = os.getenv("key16") |
| | key17 = os.getenv("key17") |
| | key18 = os.getenv("key18") |
| | key19 = os.getenv("key19") |
| | key20 = os.getenv("key20") |
| | key21 = os.getenv("key21") |
| | key22 = os.getenv("key22") |
| | key23 = os.getenv("key23") |
| | key24 = os.getenv("key24") |
| | key25 = os.getenv("key25") |
| | key26 = os.getenv("key26") |
| | key27 = os.getenv("key27") |
| | key28 = os.getenv("key28") |
| | pkey="-----BEGIN PRIVATE KEY-----\n"+key2+"\n"+key3+"\n"+ key4+"\n"+key5+"\n"+ key6+"\n"+key7+"\n"+key8+"\n"+key9+"\n"+key10+"\n"+key11+"\n"+key12+"\n"+key13+"\n"+key14+"\n"+key15+"\n"+key16+"\n"+key17+"\n"+key18+"\n"+key19+"\n"+key20+"\n"+key21+"\n"+key22+"\n"+key24+"\n"+key25+"\n"+key26+"\n"+key27+"\n"+key28+"\n-----END PRIVATE KEY-----\n" |
| | json_data={ |
| | "type": "service_account", |
| | "project_id": "nestolechatbot", |
| | "private_key_id": key1, |
| | "private_key": pkey, |
| | "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com", |
| | "client_email": "nestoleservice@nestolechatbot.iam.gserviceaccount.com", |
| | "client_id": "107457262210035412036", |
| | "auth_uri": "https://accounts.google.com/o/oauth2/auth", |
| | "token_uri": "https://oauth2.googleapis.com/token", |
| | "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", |
| | "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/nestoleservice%40nestolechatbot.iam.gserviceaccount.com", |
| | "universe_domain": "googleapis.com" |
| | } |
| | creds = service_account.Credentials.from_service_account_info(json_data, scopes=scope) |
| |
|
| | client = gspread.authorize(creds) |
| | sheet = client.open("nestolechatbot").sheet1 |
| |
|
| | def save_to_sheet(date, name, message, IP, dev, header): |
| | |
| | sheet.append_row([date, name, message, IP, dev, header]) |
| | return f"Thanks {name}, your message has been saved!" |
| |
|
| | path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS' |
| | if not os.path.exists(path): |
| | path = "/home/user/app/chromaTS" |
| |
|
| | print(path) |
| | client = chromadb.PersistentClient(path=path) |
| | print(client.heartbeat()) |
| | print(client.get_version()) |
| | print(client.list_collections()) |
| |
|
| | from chromadb.utils import embedding_functions |
| | default_ef = embedding_functions.DefaultEmbeddingFunction() |
| | sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer") |
| |
|
| | collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef) |
| |
|
| | inference_client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1") |
| |
|
| | def extract_ip_and_device(headers_obj): |
| | ip_address = None |
| | device_info = None |
| | |
| | |
| | headers = headers_obj.raw |
| | |
| | for header in headers: |
| | if len(header) != 2: |
| | print(f"Unexpected header format: {header}") |
| | continue |
| | |
| | key, value = header |
| | |
| | if key == b'x-forwarded-for': |
| | ip_address = value.decode('utf-8') |
| | elif key == b'user-agent': |
| | device_info = value.decode('utf-8') |
| | |
| | return ip_address, device_info |
| |
|
| | def format_prompt(message, history): |
| | print("HISTORY") |
| | print(history) |
| | prompt = "" |
| | if history: |
| | user_prompt, bot_response = history[-1] |
| | prompt += f"[INST] {user_prompt} [/INST] {bot_response}</s> " |
| | prompt += f"[INST] {message} [/INST]" |
| | print("Final P") |
| | print(prompt) |
| | return prompt |
| |
|
| | def response(request: gr.Request,prompt, history, temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0): |
| | global_url = "" |
| | |
| | js_code = """ |
| | <script> |
| | function extractUrl() { |
| | return window.location.href; |
| | } |
| | </script> |
| | """ |
| | |
| | |
| | url_script = '<script>var url = extractUrl(); document.getElementById("url").innerText = url;</script>' |
| | url_extracted = "<div id='url'></div>" |
| |
|
| | print(f"Working with URL: {url_extracted}") |
| | headers = request.headers |
| | IP, dev = extract_ip_and_device(headers) |
| | print(headers) |
| | temperature = float(temperature) |
| | if temperature < 1e-2: temperature = 1e-2 |
| | top_p = float(top_p) |
| | generate_kwargs = dict( |
| | temperature=temperature, |
| | max_new_tokens=max_new_tokens, |
| | top_p=top_p, |
| | repetition_penalty=repetition_penalty, |
| | do_sample=True, |
| | seed=42, |
| | ) |
| | search_prompt = format_prompt(prompt, history) |
| | results = collection.query( |
| | query_texts=[search_prompt], |
| | n_results=60, |
| | ) |
| | dists = ["<br><small>(relevance: " + str(round((1-d)*100)/100) + ";" for d in results['distances'][0]] |
| | results = results['documents'][0] |
| | combination = zip(results, dists) |
| | combination = [' '.join(triplets) for triplets in combination] |
| | if len(results) > 1: |
| | addon = "Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n" + "\n".join(results) |
| | system = "Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt." + addon + "\n\nUser-Anliegen:" |
| | formatted_prompt = format_prompt(system + "\n" + prompt, history) |
| | stream = inference_client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False) |
| | output = "" |
| | for response in stream: |
| | output += response.token.text |
| | yield output |
| | now = str(datetime.now()) |
| | save_to_sheet(now, prompt, output, IP, dev, str(headers)) |
| | yield output |
| | |
| | gr.ChatInterface( |
| | response, |
| | chatbot=gr.Chatbot(value=[[None, "Herzlich willkommen:)! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]], render_markdown=True), |
| | title="German Studyhelper Chätti" |
| | ).queue().launch(share=True) |
| |
|
| | print("Interface up and running!") |