Beijuka commited on
Commit
825ba7c
·
verified ·
1 Parent(s): 182329f

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. .gitattributes +2 -0
  2. app.py +42 -18
  3. requirements.txt +4 -3
  4. samples/00001.wav +3 -0
  5. samples/00002.wav +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ samples/00001.wav filter=lfs diff=lfs merge=lfs -text
37
+ samples/00002.wav filter=lfs diff=lfs merge=lfs -text
app.py CHANGED
@@ -2,40 +2,64 @@ import torch
2
  import gradio as gr
3
  from model import ECAPA_gender
4
 
 
 
 
 
 
 
5
  model = ECAPA_gender.from_pretrained("Beijuka/voice-gender-classifier")
6
  model.eval()
7
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
8
  model.to(device)
9
 
 
10
  def predict_gender_confidence(audio_file):
11
  if audio_file is None:
12
  return "No audio provided"
13
-
14
  try:
15
- # Load audio
16
- audio = model.load_audio(audio_file.name if hasattr(audio_file, "name") else audio_file)
 
 
 
17
  audio = audio.to(device)
18
-
19
- # Forward pass
20
  with torch.no_grad():
21
  logits = model.forward(audio)
22
- probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
23
  pred_idx = logits.argmax(dim=1).item()
24
  gender_pred = model.pred2gender[pred_idx].capitalize()
25
- confidence = probs[pred_idx] * 100
26
-
27
  return f"{gender_pred} — {confidence:.1f}% confidence"
28
-
29
  except Exception as e:
30
  return f"Error: {e}"
31
 
32
- iface = gr.Interface(
33
- fn=predict_gender_confidence,
34
- inputs=gr.Audio(type="filepath", label="Upload audio file", sources=["upload"]),
35
- outputs=gr.Textbox(label="Predicted Gender with Confidence"),
36
- title="Voice Gender Classifier",
37
- description="Upload an audio file and the model predicts speaker gender with confidence.",
38
- allow_flagging="never"
39
- )
40
 
41
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import gradio as gr
3
  from model import ECAPA_gender
4
 
5
+
6
+ SAMPLE_AUDIO = [
7
+ ("Sample 1", "samples/00001.wav"),
8
+ ("Sample 2", "samples/00002.wav"),
9
+ ]
10
+
11
  model = ECAPA_gender.from_pretrained("Beijuka/voice-gender-classifier")
12
  model.eval()
13
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
  model.to(device)
15
 
16
+
17
  def predict_gender_confidence(audio_file):
18
  if audio_file is None:
19
  return "No audio provided"
20
+
21
  try:
22
+ path = audio_file if isinstance(audio_file, str) else getattr(audio_file, "name", None)
23
+ if not path:
24
+ return "No audio path provided"
25
+
26
+ audio = model.load_audio(path)
27
  audio = audio.to(device)
28
+
 
29
  with torch.no_grad():
30
  logits = model.forward(audio)
31
+ probs = torch.softmax(logits, dim=1).cpu().numpy()[0]
32
  pred_idx = logits.argmax(dim=1).item()
33
  gender_pred = model.pred2gender[pred_idx].capitalize()
34
+ confidence = probs[pred_idx] * 100
35
+
36
  return f"{gender_pred} — {confidence:.1f}% confidence"
37
+
38
  except Exception as e:
39
  return f"Error: {e}"
40
 
 
 
 
 
 
 
 
 
41
 
42
+ with gr.Blocks(title="Voice Gender Classifier") as demo:
43
+ gr.Markdown("""
44
+ ## Voice Gender Classifier
45
+ Upload or record a short audio clip to predict speaker gender. Try the built-in samples if you need test audio.
46
+ """)
47
+
48
+ audio_input = gr.Audio(
49
+ sources=["upload", "microphone"],
50
+ type="filepath",
51
+ label="Upload or record audio",
52
+ )
53
+ prediction = gr.Textbox(label="Prediction", interactive=False)
54
+
55
+ gr.Examples(
56
+ examples=[path for _, path in SAMPLE_AUDIO],
57
+ inputs=audio_input,
58
+ outputs=prediction,
59
+ fn=predict_gender_confidence,
60
+ label="Try sample audios",
61
+ )
62
+
63
+ audio_input.change(fn=predict_gender_confidence, inputs=audio_input, outputs=prediction)
64
+
65
+ demo.launch(share=True)
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
  torch
2
  torchaudio
3
  pysoundfile
4
- gradio==4.44.1
5
- huggingface_hub==0.24.5
6
  safetensors
7
- fastapi<0.113.0
 
 
 
1
  torch
2
  torchaudio
3
  pysoundfile
4
+ huggingface_hub>=0.23.0
 
5
  safetensors
6
+ gradio==4.31.1
7
+ gradio-client==0.16.3
8
+ fastapi>=0.110,<0.116
samples/00001.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c71446ec2322ae5aa480a5e8865dfdc3535b4bac238dd87e3cb7edd06dab89fd
3
+ size 267566
samples/00002.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecdf81edc906e8941c6dfbad12b8215b2adb9f1ace079e97014073687764e63c
3
+ size 238126