Spaces:
Sleeping
Sleeping
HKAB
commited on
Commit
·
5ec340b
1
Parent(s):
dd0d853
update
Browse files- __pycache__/examples.cpython-310.pyc +0 -0
- app.py +6 -3
- examples.py +6 -5
__pycache__/examples.cpython-310.pyc
CHANGED
|
Binary files a/__pycache__/examples.cpython-310.pyc and b/__pycache__/examples.cpython-310.pyc differ
|
|
|
app.py
CHANGED
|
@@ -105,8 +105,9 @@ title = "# Streaming RNN-T with Whisper Encoder"
|
|
| 105 |
description = """
|
| 106 |
Visit <https://github.com/HKAB/rnnt-whisper-tutorial/> for more information.
|
| 107 |
|
| 108 |
-
- This model
|
| 109 |
-
- This model
|
|
|
|
| 110 |
"""
|
| 111 |
|
| 112 |
def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer):
|
|
@@ -242,7 +243,7 @@ def process(
|
|
| 242 |
with demo:
|
| 243 |
gr.Markdown(title)
|
| 244 |
gr.Markdown(description)
|
| 245 |
-
model_type = gr.Radio(["FP32", "INT8
|
| 246 |
|
| 247 |
with gr.Tabs():
|
| 248 |
with gr.TabItem("Upload from disk"):
|
|
@@ -261,6 +262,7 @@ with demo:
|
|
| 261 |
uploaded_file,
|
| 262 |
model_type
|
| 263 |
],
|
|
|
|
| 264 |
outputs=[uploaded_output, uploaded_html_info],
|
| 265 |
fn=process_uploaded_file,
|
| 266 |
label="Cherry-picked examples",
|
|
@@ -283,6 +285,7 @@ with demo:
|
|
| 283 |
microphone,
|
| 284 |
model_type
|
| 285 |
],
|
|
|
|
| 286 |
outputs=[recorded_output, recorded_html_info],
|
| 287 |
fn=process_microphone,
|
| 288 |
label="Cherry-picked examples",
|
|
|
|
| 105 |
description = """
|
| 106 |
Visit <https://github.com/HKAB/rnnt-whisper-tutorial/> for more information.
|
| 107 |
|
| 108 |
+
- This model runs on CPU (Free tier) so the RTF of FP32 model is around 1.5.
|
| 109 |
+
- This model mights not work with your microphone since it was trained on a quite clean dataset. Try to speak loudly and clearly 😃
|
| 110 |
+
- Although you upload a full audio file, the model will process it in a streaming fashion.
|
| 111 |
"""
|
| 112 |
|
| 113 |
def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer):
|
|
|
|
| 243 |
with demo:
|
| 244 |
gr.Markdown(title)
|
| 245 |
gr.Markdown(description)
|
| 246 |
+
model_type = gr.Radio(["FP32", "INT8"], label="Model type", value="FP32", info="INT8 model is faster but less accurate")
|
| 247 |
|
| 248 |
with gr.Tabs():
|
| 249 |
with gr.TabItem("Upload from disk"):
|
|
|
|
| 262 |
uploaded_file,
|
| 263 |
model_type
|
| 264 |
],
|
| 265 |
+
cache_mode="lazy",
|
| 266 |
outputs=[uploaded_output, uploaded_html_info],
|
| 267 |
fn=process_uploaded_file,
|
| 268 |
label="Cherry-picked examples",
|
|
|
|
| 285 |
microphone,
|
| 286 |
model_type
|
| 287 |
],
|
| 288 |
+
cache_mode="lazy",
|
| 289 |
outputs=[recorded_output, recorded_html_info],
|
| 290 |
fn=process_microphone,
|
| 291 |
label="Cherry-picked examples",
|
examples.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
| 1 |
examples = [
|
| 2 |
[
|
| 3 |
-
"./test_wavs/Hue_short.wav"
|
|
|
|
| 4 |
],
|
| 5 |
[
|
| 6 |
-
"./test_wavs/12345_short.wav"
|
|
|
|
| 7 |
],
|
| 8 |
[
|
| 9 |
"./test_wavs/Trump_long.mp3",
|
|
|
|
| 10 |
],
|
| 11 |
[
|
| 12 |
"./test_wavs/Ucraina_moderate.mp3",
|
| 13 |
-
|
| 14 |
-
[
|
| 15 |
-
"./test_wavs/Duongsat_short.m4a",
|
| 16 |
]
|
| 17 |
]
|
|
|
|
| 1 |
examples = [
|
| 2 |
[
|
| 3 |
+
"./test_wavs/Hue_short.wav",
|
| 4 |
+
"FP32",
|
| 5 |
],
|
| 6 |
[
|
| 7 |
+
"./test_wavs/12345_short.wav",
|
| 8 |
+
"FP32",
|
| 9 |
],
|
| 10 |
[
|
| 11 |
"./test_wavs/Trump_long.mp3",
|
| 12 |
+
"FP32",
|
| 13 |
],
|
| 14 |
[
|
| 15 |
"./test_wavs/Ucraina_moderate.mp3",
|
| 16 |
+
"FP32",
|
|
|
|
|
|
|
| 17 |
]
|
| 18 |
]
|