ArtificialCoincidence commited on
Commit
87ec7b6
·
1 Parent(s): b1c94db

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -4
app.py CHANGED
@@ -1,5 +1,9 @@
1
  from transformers import pipeline
2
  import gradio as gr
 
 
 
 
3
 
4
  pipe = pipeline(model="ArtificialCoincidence/check_points") # change to "your-username/the-name-you-picked"
5
 
@@ -7,12 +11,42 @@ def transcribe(audio):
7
  text = pipe(audio)["text"]
8
  return text
9
 
10
- iface = gr.Interface(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  fn=transcribe,
12
- inputs=gr.inputs.Audio(source="microphone", type="filepath"),
 
 
 
 
 
 
 
 
13
  outputs="text",
14
- title="Whisper Small Chinese",
15
  description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
16
  )
17
 
18
- iface.launch()
 
 
 
1
  from transformers import pipeline
2
  import gradio as gr
3
+ import os
4
+ import pytube as pt
5
+
6
+ youtube_file_path = "youtube_audio"
7
 
8
  pipe = pipeline(model="ArtificialCoincidence/check_points") # change to "your-username/the-name-you-picked"
9
 
 
11
  text = pipe(audio)["text"]
12
  return text
13
 
14
+ def _return_yt_html_embed(yt_url):
15
+ video_id = yt_url.split("?v=")[-1]
16
+ HTML_str = (
17
+ f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
18
+ " </center>"
19
+ )
20
+ return HTML_str
21
+
22
+ def transcribe_video(yt_url):
23
+ yt = pt.YouTube(yt_url)
24
+ html_embed_str = _return_yt_html_embed(yt_url)
25
+ stream = yt.streams.filter(only_audio=True)[0]
26
+ stream.download(filename=youtube_file_path)
27
+
28
+ text = transcribe(youtube_file_path)
29
+
30
+ return text
31
+
32
+ iface = gr.Blocks()
33
+
34
+ microphone_trans = gr.Interface(
35
  fn=transcribe,
36
+ inputs=gr.Audio(source="microphone", type="filepath", optional=True),
37
+ outputs="text",
38
+ title="Whisper Chinese",
39
+ description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
40
+ )
41
+
42
+ video_trans = gr.Interface(
43
+ fn=transcribe_video,
44
+ inputs=gr.Textbox(lines=1, placeholder="Paste the URL to a video here", label="video url"),
45
  outputs="text",
46
+ title="Whisper Chinese",
47
  description="Realtime demo for Chinese speech recognition using a fine-tuned Whisper small model.",
48
  )
49
 
50
+ with iface:
51
+ gr.TabbedInterface([microphone_trans, video_trans], ["Transcribe Microphone", "Transcribe Video"])
52
+ iface.launch(debug=True)