Spaces:
Build error
Build error
| import os | |
| import gradio as gr | |
| import requests | |
| import json | |
| import speech_recognition as sr | |
| from tempfile import NamedTemporaryFile | |
| import logging | |
| import time | |
| from huggingface_hub import HfApi | |
| # Logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Environment Variables | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| GROQ_MODEL = os.getenv("GROQ_MODEL", "mixtral-8x7b-32768") | |
| GROQ_API_URL = "https://api.groq.com/openai/v1/chat/completions" | |
| headers = { | |
| "Authorization": f"Bearer {GROQ_API_KEY}", | |
| "Content-Type": "application/json" | |
| } | |
| # Emotion descriptions | |
| emotion_options = { | |
| "neutral": "Neutral or balanced mood", | |
| "positive": "Generally positive or optimistic", | |
| "happy": "Feeling joy or happiness", | |
| "excited": "Feeling enthusiastic or energetic", | |
| "sad": "Feeling down or unhappy", | |
| "angry": "Feeling frustrated or irritated", | |
| "negative": "Generally negative or pessimistic", | |
| "anxious": "Feeling worried or nervous" | |
| } | |
| # History | |
| conversation_history = [] | |
| # Transcribe audio | |
| def transcribe_audio(audio_path): | |
| recognizer = sr.Recognizer() | |
| try: | |
| with sr.AudioFile(audio_path) as source: | |
| audio_data = recognizer.record(source) | |
| transcription = recognizer.recognize_google(audio_data) | |
| return transcription | |
| except Exception as e: | |
| logger.error(f"Audio transcription failed: {e}") | |
| return "" | |
| # Generate Groq response | |
| def get_groq_response(prompt, history): | |
| messages = [{"role": "system", "content": prompt}] | |
| for msg in history: | |
| if msg.startswith("User: "): | |
| messages.append({"role": "user", "content": msg[6:]}) | |
| elif msg.startswith("AI: "): | |
| messages.append({"role": "assistant", "content": msg[4:]}) | |
| payload = { | |
| "model": GROQ_MODEL, | |
| "messages": messages, | |
| "temperature": 0.7, | |
| "max_tokens": 1024 | |
| } | |
| try: | |
| response = requests.post(GROQ_API_URL, headers=headers, json=payload) | |
| return response.json()["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| logger.error(f"Groq API error: {e}") | |
| return "Error contacting AI." | |
| # Generate TTS using Yarngpt | |
| def generate_speech_and_upload(text): | |
| try: | |
| hf_model_id = "saheedniyi/Yarngpt" | |
| inference_url = f"https://api-inference.huggingface.co/models/{hf_model_id}" | |
| headers = {"Authorization": f"Bearer {HF_TOKEN}"} | |
| payload = {"inputs": text} | |
| response = requests.post(inference_url, headers=headers, json=payload) | |
| if response.status_code != 200: | |
| logger.error(f"Hugging Face TTS API error: {response.text}") | |
| return None | |
| temp_file = NamedTemporaryFile(delete=False, suffix=".wav") | |
| with open(temp_file.name, "wb") as f: | |
| f.write(response.content) | |
| return temp_file.name | |
| except Exception as e: | |
| logger.error(f"Hugging Face TTS error: {e}") | |
| return None | |
| # Main handler | |
| def chat_with_ai(audio, text_input, emotion, history): | |
| global conversation_history | |
| user_text = text_input or "" | |
| if audio: | |
| transcription = transcribe_audio(audio) | |
| if transcription: | |
| user_text = transcription | |
| else: | |
| return "Couldn't understand the audio.", None, history | |
| if not user_text.strip(): | |
| return "No input provided.", None, history | |
| conversation_history.append(f"User: {user_text}") | |
| recent_messages = conversation_history[-20:] | |
| prompt = f"You are an empathetic AI assistant. The user is feeling {emotion} ({emotion_options[emotion]}). Respond supportively." | |
| ai_response = get_groq_response(prompt, recent_messages) | |
| conversation_history.append(f"AI: {ai_response}") | |
| audio_path = generate_speech_and_upload(ai_response) | |
| return ai_response, audio_path, history + [[user_text, ai_response]] | |
| def clear_conversation(): | |
| global conversation_history | |
| conversation_history = [] | |
| return [], None, None, "Conversation cleared." | |
| # Gradio UI | |
| iface = gr.Blocks() | |
| with iface: | |
| gr.Markdown("# Mind AID AI Assistant") | |
| gr.Markdown("Talk or type to the AI assistant. Your emotional state helps tailor the response.") | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| emotion = gr.Dropdown(label="Your emotion?", choices=list(emotion_options.keys()), value="neutral") | |
| emotion_description = gr.Markdown("**Current mood:** Neutral") | |
| def update_emotion_desc(em): | |
| return f"**Current mood:** {emotion_options.get(em, 'Unknown')}" | |
| emotion.change(fn=update_emotion_desc, inputs=[emotion], outputs=[emotion_description]) | |
| with gr.Column(scale=1): | |
| clear_btn = gr.Button("Clear Conversation") | |
| status = gr.Textbox(label="Status") | |
| chat_history = gr.Chatbot(label="Chat History", height=300) | |
| with gr.Row(): | |
| text_input = gr.Textbox(label="Type your message", lines=2) | |
| audio_input = gr.Audio(label="Or speak", type="filepath", sources=["microphone"]) | |
| output_audio = gr.Audio(label="AI Voice Response") | |
| submit_btn = gr.Button("Send", variant="primary") | |
| submit_btn.click( | |
| fn=chat_with_ai, | |
| inputs=[audio_input, text_input, emotion, chat_history], | |
| outputs=[status, output_audio, chat_history] | |
| ) | |
| text_input.submit( | |
| fn=chat_with_ai, | |
| inputs=[audio_input, text_input, emotion, chat_history], | |
| outputs=[status, output_audio, chat_history] | |
| ) | |
| clear_btn.click( | |
| fn=clear_conversation, | |
| inputs=[], | |
| outputs=[chat_history, audio_input, text_input, status] | |
| ) | |
| iface.launch() | |
| Here is the complete revised code with Yarngpt integrated for text-to-speech output via Hugging Face. Make sure your HF_TOKEN is correctly set in your environment and has access to the model saheedniyi/Yarngpt. Let me know if you need help deploying this. | |