Spaces:

ArtificialCoincidence
/

whisper_chinese

Running

App Files Files Community

ArtificialCoincidence commited on Dec 8, 2023

Commit

87ec7b6

1 Parent(s): b1c94db

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -4

app.py CHANGED Viewed

@@ -1,5 +1,9 @@
 from transformers import pipeline
 import gradio as gr
 pipe = pipeline(model="ArtificialCoincidence/check_points")  # change to "your-username/the-name-you-picked"
@@ -7,12 +11,42 @@ def transcribe(audio):
     text = pipe(audio)["text"]
     return text
-iface = gr.Interface(
     fn=transcribe,
-    inputs=gr.inputs.Audio(source="microphone", type="filepath"),
     outputs="text",
-    title="Whisper Small Chinese",
     description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
 )
-iface.launch()

 from transformers import pipeline
 import gradio as gr
+import os
+import pytube as pt
+youtube_file_path = "youtube_audio"
 pipe = pipeline(model="ArtificialCoincidence/check_points")  # change to "your-username/the-name-you-picked"
     text = pipe(audio)["text"]
     return text
+def _return_yt_html_embed(yt_url):
+    video_id = yt_url.split("?v=")[-1]
+    HTML_str = (
+        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
+        " </center>"
+    )
+    return HTML_str
+def transcribe_video(yt_url):
+    yt = pt.YouTube(yt_url)
+    html_embed_str = _return_yt_html_embed(yt_url)
+    stream = yt.streams.filter(only_audio=True)[0]
+    stream.download(filename=youtube_file_path)
+    text = transcribe(youtube_file_path)
+    return text
+iface = gr.Blocks()
+microphone_trans = gr.Interface(
     fn=transcribe,
+    inputs=gr.Audio(source="microphone", type="filepath", optional=True),
+    outputs="text",
+    title="Whisper Chinese",
+    description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
+)
+video_trans = gr.Interface(
+    fn=transcribe_video,
+    inputs=gr.Textbox(lines=1, placeholder="Paste the URL to a video here", label="video url"),
     outputs="text",
+    title="Whisper Chinese",
     description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
 )
+with iface:
+    gr.TabbedInterface([microphone_trans, video_trans], ["Transcribe Microphone", "Transcribe Video"])
+iface.launch(debug=True)