| | |
| | """ |
| | Kiswahili Voice Agent for Hugging Face Spaces |
| | Natural conversational Kiswahili voice-to-voice assistant |
| | """ |
| |
|
| | import gradio as gr |
| | import os |
| | from datetime import datetime |
| | import json |
| |
|
| | |
| | try: |
| | from gtts import gTTS |
| | HAS_GTTS = True |
| | except ImportError: |
| | HAS_GTTS = False |
| |
|
| | try: |
| | import speech_recognition as sr |
| | HAS_SR = True |
| | except ImportError: |
| | HAS_SR = False |
| |
|
| | try: |
| | import requests |
| | HAS_REQUESTS = True |
| | except ImportError: |
| | HAS_REQUESTS = False |
| |
|
| | |
| | conversation_history = [] |
| | conversation_id = None |
| |
|
| | |
| | SYSTEM_PROMPT = """Wewe ni Manus, msaidizi wa sauti wa Kiswahili ambaye ana tabia nzuri na karimu. |
| | Unazungumza Kiswahili safi na asilia, na unafahamu utamaduni wa Kiswahili. |
| | Katika kila jibu, jaribu kuuliza swali la mfuatano ili kuendelea na mazungumzo. |
| | Jibu kwa ufupi lakini kwa maana - kwa kawaida 1-2 sentensi. |
| | Kila jibu lazima liwe na swali au kauli inayokamatia mazungumzo.""" |
| |
|
| | |
| | UI_STRINGS = { |
| | "title": "ποΈ Manus - Msaidizi wa Sauti wa Kiswahili", |
| | "subtitle": "Mazungumzo ya asilia kwa Kiswahili", |
| | "instruction": "Bonyeza kurekodi, sema kitu kwa Kiswahili, kisha bonyeza kuacha.", |
| | "status_recording": "π΄ Inasikiliza...", |
| | "status_processing": "βοΈ Inachakata...", |
| | "status_ready": "β
Tayari", |
| | "status_error": "β Hitilafu", |
| | "user_label": "Wewe:", |
| | "assistant_label": "Manus:", |
| | "reset_button": "π Anza Upya", |
| | "reset_confirm": "Mazungumzo yamefutwa. Karibu tena!", |
| | "error_audio": "Haiwezekani kusoma sauti. Tafadhali jaribu tena.", |
| | "error_process": "Haiwezekani kuchakata sauti. Tafadhali jaribu tena.", |
| | "welcome": "Habari! Naitwa Manus. Karibu sana! Unaweza kusema kitu chochote kwa Kiswahili, na nitakujibu.", |
| | } |
| |
|
| | def transcribe_audio(audio_file): |
| | """Transcribe Kiswahili audio using speech recognition""" |
| | if not HAS_SR: |
| | return "Haiwezekani kusoma sauti - moduli haipo" |
| | |
| | try: |
| | recognizer = sr.Recognizer() |
| | with sr.AudioFile(audio_file) as source: |
| | audio = recognizer.record(source) |
| | |
| | |
| | text = recognizer.recognize_google(audio, language="sw-TZ") |
| | return text |
| | except sr.UnknownValueError: |
| | return "Haiwezekani kuelewa sauti. Tafadhali jaribu tena." |
| | except sr.RequestError: |
| | return "Haiwezekani kuunganisha na huduma ya mtandao." |
| | except Exception as e: |
| | return f"Hitilafu: {str(e)}" |
| |
|
| | def generate_response(user_text): |
| | """Generate natural Kiswahili response using simple logic""" |
| | |
| | |
| | user_text_lower = user_text.lower() |
| | |
| | |
| | greetings = { |
| | "habari": "Habari nzuri! Niko sawa. Wewe je, uko sawa?", |
| | "jina": "Naitwa Manus, msaidizi wako wa sauti. Jina lako nani?", |
| | "asante": "Karibu sana! Kuna kitu kingine ninachoweza kukusaidia?", |
| | "pole": "Pole pole! Kila kitu kitakuwa sawa. Unaweza kusema nini kinachokukosesha?", |
| | "ndiyo": "Nzuri! Unaweza kusema zaidi?", |
| | "hapana": "Sawa. Kuna kitu kingine?", |
| | } |
| | |
| | |
| | for keyword, response in greetings.items(): |
| | if keyword in user_text_lower: |
| | return response |
| | |
| | |
| | default_responses = [ |
| | "Hiyo ni kitu kizuri! Unaweza kusema zaidi kuhusu hilo?", |
| | "Nimeelewa. Na kisha nini?", |
| | "Sawa! Hiyo ni kitu muhimu. Unaweza kueneza?", |
| | "Nzuri sana! Unaweza kusema kitu kingine?", |
| | "Hiyo ni interesting! Unaweza kuniambia zaidi?", |
| | ] |
| | |
| | import random |
| | return random.choice(default_responses) |
| |
|
| | def text_to_speech_kiswahili(text): |
| | """Convert Kiswahili text to speech""" |
| | if not HAS_GTTS: |
| | return None |
| | |
| | try: |
| | tts = gTTS(text=text, lang='sw', slow=False) |
| | audio_file = "/tmp/response.mp3" |
| | tts.save(audio_file) |
| | return audio_file |
| | except Exception as e: |
| | print(f"TTS Error: {e}") |
| | return None |
| |
|
| | def process_voice_input(audio_input): |
| | """Main processing function for voice input""" |
| | global conversation_history, conversation_id |
| | |
| | if audio_input is None: |
| | return ( |
| | UI_STRINGS["status_error"], |
| | UI_STRINGS["error_audio"], |
| | None, |
| | gr.update(value="") |
| | ) |
| | |
| | try: |
| | |
| | user_text = transcribe_audio(audio_input) |
| | |
| | if "Hitilafu" in user_text or "Haiwezekani" in user_text: |
| | return ( |
| | UI_STRINGS["status_error"], |
| | user_text, |
| | None, |
| | gr.update(value="") |
| | ) |
| | |
| | |
| | assistant_response = generate_response(user_text) |
| | |
| | |
| | audio_response = text_to_speech_kiswahili(assistant_response) |
| | |
| | |
| | conversation_history.append({ |
| | "timestamp": datetime.now().isoformat(), |
| | "user": user_text, |
| | "assistant": assistant_response |
| | }) |
| | |
| | |
| | conversation_text = "" |
| | for msg in conversation_history: |
| | conversation_text += f"\n**{UI_STRINGS['user_label']}** {msg['user']}\n" |
| | conversation_text += f"**{UI_STRINGS['assistant_label']}** {msg['assistant']}\n" |
| | |
| | return ( |
| | UI_STRINGS["status_ready"], |
| | conversation_text, |
| | audio_response, |
| | gr.update(value="") |
| | ) |
| | |
| | except Exception as e: |
| | error_msg = f"{UI_STRINGS['status_error']}: {str(e)}" |
| | return ( |
| | UI_STRINGS["status_error"], |
| | error_msg, |
| | None, |
| | gr.update(value="") |
| | ) |
| |
|
| | def reset_conversation(): |
| | """Reset conversation history""" |
| | global conversation_history |
| | conversation_history = [] |
| | return ( |
| | UI_STRINGS["status_ready"], |
| | UI_STRINGS["reset_confirm"], |
| | None, |
| | gr.update(value="") |
| | ) |
| |
|
| | |
| | with gr.Blocks(title=UI_STRINGS["title"], theme=gr.themes.Soft()) as demo: |
| | gr.Markdown(f"# {UI_STRINGS['title']}") |
| | gr.Markdown(f"### {UI_STRINGS['subtitle']}") |
| | gr.Markdown(f"> {UI_STRINGS['instruction']}") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | |
| | status_display = gr.Textbox( |
| | value=UI_STRINGS["status_ready"], |
| | label="Hali", |
| | interactive=False, |
| | lines=1 |
| | ) |
| | |
| | |
| | audio_input = gr.Audio( |
| | label="π€ Rekodi Sauti", |
| | type="filepath", |
| | sources=["microphone"] |
| | ) |
| | |
| | |
| | process_btn = gr.Button( |
| | "π€ Tuma Sauti", |
| | variant="primary", |
| | size="lg" |
| | ) |
| | |
| | |
| | reset_btn = gr.Button( |
| | UI_STRINGS["reset_button"], |
| | variant="secondary" |
| | ) |
| | |
| | with gr.Column(scale=1): |
| | |
| | conversation_display = gr.Markdown( |
| | value=f"**{UI_STRINGS['assistant_label']}** {UI_STRINGS['welcome']}\n", |
| | label="Mazungumzo" |
| | ) |
| | |
| | |
| | audio_output = gr.Audio( |
| | label="π Jibu la Sauti", |
| | type="filepath", |
| | interactive=False |
| | ) |
| | |
| | |
| | process_btn.click( |
| | fn=process_voice_input, |
| | inputs=[audio_input], |
| | outputs=[status_display, conversation_display, audio_output, audio_input] |
| | ) |
| | |
| | reset_btn.click( |
| | fn=reset_conversation, |
| | outputs=[status_display, conversation_display, audio_output, audio_input] |
| | ) |
| | |
| | |
| | audio_input.change( |
| | fn=lambda audio: process_voice_input(audio) if audio else ( |
| | UI_STRINGS["status_ready"], |
| | conversation_display.value, |
| | None, |
| | gr.update(value="") |
| | ), |
| | inputs=[audio_input], |
| | outputs=[status_display, conversation_display, audio_output, audio_input] |
| | ) |
| |
|
| | if __name__ == "__main__": |
| | demo.launch( |
| | server_name="0.0.0.0", |
| | server_port=7860, |
| | share=True |
| | ) |
| |
|