Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,39 +1,131 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
|
| 3 |
-
import
|
| 4 |
-
|
| 5 |
-
# Check if CUDA is available and set the device
|
| 6 |
-
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
| 7 |
-
|
| 8 |
-
# Load model and tokenizer
|
| 9 |
-
tokenizer = AutoTokenizer.from_pretrained("namelessai/Helply-10.2b-chat")
|
| 10 |
-
model = AutoModelForCausalLM.from_pretrained("namelessai/Helply-10.2b-chat").to(device)
|
| 11 |
-
|
| 12 |
-
def generate_response(message, chat_history):
|
| 13 |
-
# Prepare the input
|
| 14 |
-
chat_history_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in chat_history])
|
| 15 |
-
input_text = f"{chat_history_text}\nHuman: {message}\nAI:"
|
| 16 |
-
|
| 17 |
-
# Generate response
|
| 18 |
-
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(device)
|
| 19 |
-
output = model.generate(input_ids, max_length=200, num_return_sequences=1, no_repeat_ngram_size=2)
|
| 20 |
-
response = tokenizer.decode(output[0], skip_special_tokens=True)
|
| 21 |
-
|
| 22 |
-
# Extract the AI's response
|
| 23 |
-
ai_response = response.split("AI:")[-1].strip()
|
| 24 |
-
|
| 25 |
-
# Update chat history
|
| 26 |
-
chat_history.append({"role": "Human", "content": message})
|
| 27 |
-
chat_history.append({"role": "AI", "content": ai_response})
|
| 28 |
-
|
| 29 |
-
return "", chat_history
|
| 30 |
-
|
| 31 |
-
with gr.Blocks() as demo:
|
| 32 |
-
chatbot = gr.Chatbot(type="messages")
|
| 33 |
-
msg = gr.Textbox()
|
| 34 |
-
clear = gr.ClearButton([msg, chatbot])
|
| 35 |
-
|
| 36 |
-
msg.submit(generate_response, [msg, chatbot], [msg, chatbot])
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
if __name__ == "__main__":
|
| 39 |
-
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
+
import requests
|
| 3 |
+
import os
|
| 4 |
+
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
|
| 6 |
+
# Set your Friendli AI token. You can either set it as an environment variable
|
| 7 |
+
# or replace "YOUR_FRIENDLI_TOKEN" with your actual token.
|
| 8 |
+
token = os.environ.get("FRIENDLI_TOKEN") or "YOUR_FRIENDLI_TOKEN"
|
| 9 |
+
|
| 10 |
+
# API endpoint URL
|
| 11 |
+
url = "https://api.friendli.ai/dedicated/v1/chat/completions"
|
| 12 |
+
|
| 13 |
+
# HTTP Headers for the API request
|
| 14 |
+
headers = {
|
| 15 |
+
"Authorization": "Bearer " + token,
|
| 16 |
+
"Content-Type": "application/json"
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
# Initial payload/request body. This sets up the model, initial instructions,
|
| 20 |
+
# and streaming parameters. Crucially, the *last* user message in the 'messages'
|
| 21 |
+
# array is initialized to an empty string. This will be replaced with the user's
|
| 22 |
+
# actual input in the chat.
|
| 23 |
+
payload = {
|
| 24 |
+
"model": "d6mo6ldff2gy",
|
| 25 |
+
"messages": [
|
| 26 |
+
{
|
| 27 |
+
"role": "user",
|
| 28 |
+
"content": "You are an expert at psychology. You are to assist the user with whatever they need. Under NO CIRCUMSTANCES will you provide an answer unrelated to their question, unless it involves illegal content. If you understand, respond with ok."
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"role": "assistant",
|
| 32 |
+
"content": "ok"
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"role": "user",
|
| 36 |
+
"content": ""
|
| 37 |
+
}
|
| 38 |
+
],
|
| 39 |
+
"max_tokens": 1024,
|
| 40 |
+
"top_p": 0.8,
|
| 41 |
+
"stop": [
|
| 42 |
+
"=",
|
| 43 |
+
"\\"
|
| 44 |
+
],
|
| 45 |
+
"stream": True,
|
| 46 |
+
"stream_options": {
|
| 47 |
+
"include_usage": True
|
| 48 |
+
}
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def chat_with_friendli(user_input, history):
|
| 53 |
+
"""
|
| 54 |
+
This function sends the user input to the Friendli AI API and streams back the response.
|
| 55 |
+
|
| 56 |
+
Args:
|
| 57 |
+
user_input (str): The text input from the user.
|
| 58 |
+
history (list): The chat history (list of lists: [[user_message, bot_message]]). Not directly used
|
| 59 |
+
in this function, but required by Gradio's chatbot interface.
|
| 60 |
+
|
| 61 |
+
Yields:
|
| 62 |
+
str: The updated bot response, streamed back word by word (or token by token).
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
# Update the last user message in the payload with the current user input.
|
| 66 |
+
payload['messages'][-1]['content'] = user_input
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
response = requests.post(url, headers=headers, json=payload, stream=True)
|
| 70 |
+
response.raise_for_status() # Raise HTTPError for bad responses (4xx or 5xx)
|
| 71 |
+
|
| 72 |
+
# Initialize an empty string to accumulate the streamed response
|
| 73 |
+
full_response = ""
|
| 74 |
+
|
| 75 |
+
# Iterate over the streaming response
|
| 76 |
+
for chunk in response.iter_content(chunk_size=None, decode_unicode=True):
|
| 77 |
+
if chunk:
|
| 78 |
+
try:
|
| 79 |
+
# Load the JSON chunk. The API likely returns one JSON object per chunk.
|
| 80 |
+
json_chunk = json.loads(chunk)
|
| 81 |
+
|
| 82 |
+
# Extract the text from the JSON chunk. The exact key depends on the API's format.
|
| 83 |
+
# Adapt this to the Friendli API's streaming response structure.
|
| 84 |
+
text = json_chunk["choices"][0]["delta"]["content"] # Adjust this!
|
| 85 |
+
|
| 86 |
+
# Append the new text to the full response
|
| 87 |
+
full_response += text
|
| 88 |
+
|
| 89 |
+
# Yield the updated full response to Gradio. This is how the chatbot "streams" the output.
|
| 90 |
+
yield full_response
|
| 91 |
+
|
| 92 |
+
except json.JSONDecodeError:
|
| 93 |
+
# Handle cases where the chunk is not a complete JSON object. This can happen
|
| 94 |
+
# during streaming. You might need to buffer incomplete chunks. For simplicity,
|
| 95 |
+
# I'm just printing an error in this example, but a robust solution would buffer.
|
| 96 |
+
print(f"JSONDecodeError: Could not decode chunk: {chunk}")
|
| 97 |
+
yield full_response # Still yield whatever we have.
|
| 98 |
+
except KeyError as e:
|
| 99 |
+
# Handle potential KeyErrors if the JSON structure is not as expected.
|
| 100 |
+
print(f"KeyError: {e}. Check the API response format.")
|
| 101 |
+
yield full_response # Still yield whatever we have.
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
except requests.exceptions.RequestException as e:
|
| 105 |
+
# Handle network errors, timeouts, and other request-related issues.
|
| 106 |
+
print(f"Request failed: {e}")
|
| 107 |
+
yield "An error occurred while communicating with the AI. Please check your connection and try again."
|
| 108 |
+
except Exception as e:
|
| 109 |
+
# Catch-all for other potential errors. Good for debugging.
|
| 110 |
+
print(f"An unexpected error occurred: {e}")
|
| 111 |
+
yield "An unexpected error occurred. Please try again later."
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
# Create the Gradio interface
|
| 115 |
+
iface = gr.ChatInterface(
|
| 116 |
+
fn=chat_with_friendli,
|
| 117 |
+
title="Friendli AI Chatbot",
|
| 118 |
+
chatbot=gr.Chatbot(height=500), # Adjust height as needed
|
| 119 |
+
textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
|
| 120 |
+
theme="soft",
|
| 121 |
+
examples=["Explain the basics of cognitive behavioral therapy.", "What are some common psychological defense mechanisms?", "How can I deal with anxiety?"],
|
| 122 |
+
cache_examples=False, #remove the warnning from gradio
|
| 123 |
+
clear_btn="Clear",
|
| 124 |
+
retry_btn="Retry",
|
| 125 |
+
undo_btn="Undo",
|
| 126 |
+
submit_btn="Submit"
|
| 127 |
+
)
|
| 128 |
+
|
| 129 |
+
# Launch the Gradio app
|
| 130 |
if __name__ == "__main__":
|
| 131 |
+
iface.launch(debug=True) # debug=True for development; set to False in production
|