Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import requests | |
| import os | |
| # API information for Hugging Face Inference API | |
| API_URL = "https://api-inference.huggingface.co/models/jonatasgrosman/wav2vec2-large-xlsr-53-arabic" | |
| # Fetch the API token from Hugging Face Secrets | |
| hf_api_token = os.getenv("HF_API_TOKEN") | |
| headers = {"Authorization": f"Bearer {hf_api_token}"} | |
| def query(filename): | |
| """ | |
| Queries the Hugging Face API to transcribe audio from a file. | |
| Args: | |
| filename (str): Path to the audio file. | |
| Returns: | |
| dict: The response from the Hugging Face API with transcription. | |
| """ | |
| with open(filename, "rb") as f: | |
| data = f.read() | |
| response = requests.post(API_URL, headers=headers, data=data) | |
| return response.json() | |
| def transcribe_audio_hf(audio_path): | |
| """ | |
| Transcribes the audio using the Hugging Face Inference API. | |
| Args: | |
| audio_path (str): Path to the audio file. | |
| Returns: | |
| str: The transcription from the API. | |
| """ | |
| result = query(audio_path) | |
| transcription = result.get('text', '').strip() | |
| return transcription | |
| def levenshtein_similarity(transcription1, transcription2): | |
| """ | |
| Calculate the Levenshtein similarity between two transcriptions. | |
| Args: | |
| transcription1 (str): The first transcription. | |
| transcription2 (str): The second transcription. | |
| Returns: | |
| float: A normalized similarity score between 0 and 1, where 1 indicates identical transcriptions. | |
| """ | |
| import Levenshtein | |
| distance = Levenshtein.distance(transcription1, transcription2) | |
| max_len = max(len(transcription1), len(transcription2)) | |
| return 1 - distance / max_len # Normalize to get similarity score | |
| def evaluate_audio_similarity(original_audio, user_audio): | |
| """ | |
| Compares the similarity between the transcription of an original audio file and a user's audio file. | |
| Args: | |
| original_audio (str): Path to the original audio file. | |
| user_audio (str): Path to the user's audio file. | |
| Returns: | |
| tuple: Transcriptions and Levenshtein similarity score. | |
| """ | |
| transcription_original = transcribe_audio_hf(original_audio) | |
| transcription_user = transcribe_audio_hf(user_audio) | |
| similarity_score_levenshtein = levenshtein_similarity(transcription_original, transcription_user) | |
| return transcription_original, transcription_user, similarity_score_levenshtein | |
| def perform_testing(original_audio, user_audio): | |
| if original_audio is not None and user_audio is not None: | |
| transcription_original, transcription_user, similarity_score = evaluate_audio_similarity(original_audio, user_audio) | |
| return ( | |
| f"**Original Transcription:** {transcription_original}", | |
| f"**User Transcription:** {transcription_user}", | |
| f"**Levenshtein Similarity Score:** {similarity_score:.2f}" | |
| ) | |
| # Gradio Interface | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Audio Transcription and Similarity Checker using Hugging Face Inference API") | |
| with gr.Tab("Upload"): | |
| original_audio_upload = gr.Audio(label="Upload Original Audio", type="filepath") | |
| user_audio_upload = gr.Audio(label="Upload User Audio", type="filepath") | |
| upload_button = gr.Button("Perform Testing") | |
| output_original_transcription = gr.Markdown() | |
| output_user_transcription = gr.Markdown() | |
| output_similarity_score = gr.Markdown() | |
| upload_button.click( | |
| perform_testing, | |
| inputs=[original_audio_upload, user_audio_upload], | |
| outputs=[output_original_transcription, output_user_transcription, output_similarity_score] | |
| ) | |
| app.launch() | |