Spaces:
Sleeping
Sleeping
HKAB
commited on
Commit
·
dd0d853
1
Parent(s):
486b001
add quant model
Browse files
__pycache__/examples.cpython-310.pyc
ADDED
|
Binary file (334 Bytes). View file
|
|
|
app.py
CHANGED
|
@@ -38,6 +38,10 @@ ort_encoder_session = ort.InferenceSession("./onnx/encoder_160_8.onnx")
|
|
| 38 |
ort_decoder_session = ort.InferenceSession("./onnx/decoder_160_8.onnx")
|
| 39 |
ort_jointer_session = ort.InferenceSession("./onnx/jointer_160_8.onnx")
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
demo = gr.Blocks()
|
| 42 |
|
| 43 |
def build_html_output(s: str, style: str = "result_item_success"):
|
|
@@ -55,7 +59,8 @@ def MyPrint(s):
|
|
| 55 |
print(f"{date_time}: {s}")
|
| 56 |
|
| 57 |
def process_microphone(
|
| 58 |
-
in_filename: str
|
|
|
|
| 59 |
):
|
| 60 |
if in_filename is None or in_filename == "":
|
| 61 |
return "", build_html_output(
|
|
@@ -69,6 +74,7 @@ def process_microphone(
|
|
| 69 |
try:
|
| 70 |
return process(
|
| 71 |
in_filename=in_filename,
|
|
|
|
| 72 |
)
|
| 73 |
except Exception as e:
|
| 74 |
MyPrint(str(e))
|
|
@@ -76,6 +82,7 @@ def process_microphone(
|
|
| 76 |
|
| 77 |
def process_uploaded_file(
|
| 78 |
in_filename: str,
|
|
|
|
| 79 |
):
|
| 80 |
if in_filename is None or in_filename == "":
|
| 81 |
return "", build_html_output(
|
|
@@ -87,7 +94,8 @@ def process_uploaded_file(
|
|
| 87 |
MyPrint(f"Processing uploaded file: {in_filename}")
|
| 88 |
try:
|
| 89 |
return process(
|
| 90 |
-
in_filename=in_filename
|
|
|
|
| 91 |
)
|
| 92 |
except Exception as e:
|
| 93 |
MyPrint(str(e))
|
|
@@ -194,9 +202,8 @@ def onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_j
|
|
| 194 |
|
| 195 |
def process(
|
| 196 |
in_filename: str,
|
|
|
|
| 197 |
):
|
| 198 |
-
# filename = convert_to_wav(in_filename)
|
| 199 |
-
|
| 200 |
now = datetime.now()
|
| 201 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 202 |
MyPrint(f"Started at {date_time}")
|
|
@@ -208,7 +215,12 @@ def process(
|
|
| 208 |
duration = len(audio) / SAMPLE_RATE
|
| 209 |
|
| 210 |
audio = np.expand_dims(audio, 0).astype(np.float32)
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 214 |
end = time.time()
|
|
@@ -230,6 +242,7 @@ def process(
|
|
| 230 |
with demo:
|
| 231 |
gr.Markdown(title)
|
| 232 |
gr.Markdown(description)
|
|
|
|
| 233 |
|
| 234 |
with gr.Tabs():
|
| 235 |
with gr.TabItem("Upload from disk"):
|
|
@@ -245,7 +258,8 @@ with demo:
|
|
| 245 |
gr.Examples(
|
| 246 |
examples=examples,
|
| 247 |
inputs=[
|
| 248 |
-
uploaded_file
|
|
|
|
| 249 |
],
|
| 250 |
outputs=[uploaded_output, uploaded_html_info],
|
| 251 |
fn=process_uploaded_file,
|
|
@@ -266,7 +280,8 @@ with demo:
|
|
| 266 |
gr.Examples(
|
| 267 |
examples=examples,
|
| 268 |
inputs=[
|
| 269 |
-
microphone
|
|
|
|
| 270 |
],
|
| 271 |
outputs=[recorded_output, recorded_html_info],
|
| 272 |
fn=process_microphone,
|
|
@@ -276,7 +291,8 @@ with demo:
|
|
| 276 |
upload_button.click(
|
| 277 |
process_uploaded_file,
|
| 278 |
inputs=[
|
| 279 |
-
uploaded_file
|
|
|
|
| 280 |
],
|
| 281 |
outputs=[uploaded_output, uploaded_html_info],
|
| 282 |
)
|
|
@@ -285,6 +301,7 @@ with demo:
|
|
| 285 |
process_microphone,
|
| 286 |
inputs=[
|
| 287 |
microphone,
|
|
|
|
| 288 |
],
|
| 289 |
outputs=[recorded_output, recorded_html_info],
|
| 290 |
)
|
|
|
|
| 38 |
ort_decoder_session = ort.InferenceSession("./onnx/decoder_160_8.onnx")
|
| 39 |
ort_jointer_session = ort.InferenceSession("./onnx/jointer_160_8.onnx")
|
| 40 |
|
| 41 |
+
ort_encoder_session_quant = ort.InferenceSession("./onnx/encoder_160_8-infer.quant.onnx")
|
| 42 |
+
ort_decoder_session_quant = ort.InferenceSession("./onnx/decoder_160_8-infer.quant.onnx")
|
| 43 |
+
ort_jointer_session_quant = ort.InferenceSession("./onnx/jointer_160_8-infer.quant.onnx")
|
| 44 |
+
|
| 45 |
demo = gr.Blocks()
|
| 46 |
|
| 47 |
def build_html_output(s: str, style: str = "result_item_success"):
|
|
|
|
| 59 |
print(f"{date_time}: {s}")
|
| 60 |
|
| 61 |
def process_microphone(
|
| 62 |
+
in_filename: str,
|
| 63 |
+
model_type: str
|
| 64 |
):
|
| 65 |
if in_filename is None or in_filename == "":
|
| 66 |
return "", build_html_output(
|
|
|
|
| 74 |
try:
|
| 75 |
return process(
|
| 76 |
in_filename=in_filename,
|
| 77 |
+
model_type=model_type
|
| 78 |
)
|
| 79 |
except Exception as e:
|
| 80 |
MyPrint(str(e))
|
|
|
|
| 82 |
|
| 83 |
def process_uploaded_file(
|
| 84 |
in_filename: str,
|
| 85 |
+
model_type: str
|
| 86 |
):
|
| 87 |
if in_filename is None or in_filename == "":
|
| 88 |
return "", build_html_output(
|
|
|
|
| 94 |
MyPrint(f"Processing uploaded file: {in_filename}")
|
| 95 |
try:
|
| 96 |
return process(
|
| 97 |
+
in_filename=in_filename,
|
| 98 |
+
model_type=model_type
|
| 99 |
)
|
| 100 |
except Exception as e:
|
| 101 |
MyPrint(str(e))
|
|
|
|
| 202 |
|
| 203 |
def process(
|
| 204 |
in_filename: str,
|
| 205 |
+
model_type: str
|
| 206 |
):
|
|
|
|
|
|
|
| 207 |
now = datetime.now()
|
| 208 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 209 |
MyPrint(f"Started at {date_time}")
|
|
|
|
| 215 |
duration = len(audio) / SAMPLE_RATE
|
| 216 |
|
| 217 |
audio = np.expand_dims(audio, 0).astype(np.float32)
|
| 218 |
+
if model_type == "FP32":
|
| 219 |
+
MyPrint("Using FP32 model")
|
| 220 |
+
text = onnx_online_inference(audio, ort_encoder_session, ort_decoder_session, ort_jointer_session, tokenizer)
|
| 221 |
+
else:
|
| 222 |
+
MyPrint("Using INT8 model")
|
| 223 |
+
text = onnx_online_inference(audio, ort_encoder_session_quant, ort_decoder_session_quant, ort_jointer_session_quant, tokenizer)
|
| 224 |
|
| 225 |
date_time = now.strftime("%Y-%m-%d %H:%M:%S.%f")
|
| 226 |
end = time.time()
|
|
|
|
| 242 |
with demo:
|
| 243 |
gr.Markdown(title)
|
| 244 |
gr.Markdown(description)
|
| 245 |
+
model_type = gr.Radio(["FP32", "INT8 (Quantized)"], label="Model type", value="FP32", info="INT8 model is faster but less accurate")
|
| 246 |
|
| 247 |
with gr.Tabs():
|
| 248 |
with gr.TabItem("Upload from disk"):
|
|
|
|
| 258 |
gr.Examples(
|
| 259 |
examples=examples,
|
| 260 |
inputs=[
|
| 261 |
+
uploaded_file,
|
| 262 |
+
model_type
|
| 263 |
],
|
| 264 |
outputs=[uploaded_output, uploaded_html_info],
|
| 265 |
fn=process_uploaded_file,
|
|
|
|
| 280 |
gr.Examples(
|
| 281 |
examples=examples,
|
| 282 |
inputs=[
|
| 283 |
+
microphone,
|
| 284 |
+
model_type
|
| 285 |
],
|
| 286 |
outputs=[recorded_output, recorded_html_info],
|
| 287 |
fn=process_microphone,
|
|
|
|
| 291 |
upload_button.click(
|
| 292 |
process_uploaded_file,
|
| 293 |
inputs=[
|
| 294 |
+
uploaded_file,
|
| 295 |
+
model_type
|
| 296 |
],
|
| 297 |
outputs=[uploaded_output, uploaded_html_info],
|
| 298 |
)
|
|
|
|
| 301 |
process_microphone,
|
| 302 |
inputs=[
|
| 303 |
microphone,
|
| 304 |
+
model_type
|
| 305 |
],
|
| 306 |
outputs=[recorded_output, recorded_html_info],
|
| 307 |
)
|
onnx/decoder_160_8-infer.quant.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:978b787f710a2be2598360bc77e181c0cb0ec004555716b90041b9e8c43a06c3
|
| 3 |
+
size 17324565
|
onnx/encoder_160_8-infer.quant.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95ceeac88d898e1d4d275a185ba891580604bcfd44c0b3611530e8613c23b8f4
|
| 3 |
+
size 101468916
|
onnx/jointer_160_8-infer.quant.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f68c8210d14abce4ca065ed8bf6d0141666c7f66a74bd67cf9c63aef4c989ec6
|
| 3 |
+
size 793884
|