import gradio as gr from transformers import pipeline import numpy as np transcription = pipeline('automatic-speech-recognition', model='openai/whisper-base') def asrtranscription(audio): sr, y = audio y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcription({'sampling_rate': sr, 'raw': y})['text'] demo = gr.Interface( fn = asrtranscription, inputs = gr.Audio(sources=['microphone']), outputs = 'text', ) demo.launch()