Ronaldodev commited on
Commit
bb2e6f4
·
verified ·
1 Parent(s): c6ed426

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -37,31 +37,34 @@ def transcribe(audio):
37
 
38
  return processor.decode(ids, skip_special_tokens=True)
39
 
40
- def transcribe_long(audio, chunk_seconds=30):
41
  if audio is None:
42
  return "Aucun audio fourni."
43
 
44
- # 1. Load audio
45
  speech, sr = librosa.load(audio, sr=16000)
46
-
47
- # 2. Découpage en chunks de 30s
48
  chunk_size = chunk_seconds * sr
49
- chunks = [speech[i:i + chunk_size] for i in range(0, len(speech), chunk_size)]
50
-
51
  full_text = ""
52
- for idx, chunk in enumerate(chunks):
 
 
 
 
53
  inputs = processor(chunk, sampling_rate=16000, return_tensors="pt").input_features.to(device)
54
 
55
  with torch.no_grad():
56
- ids = model.generate(inputs, max_length=300)[0]
57
 
58
  text = processor.decode(ids, skip_special_tokens=True)
59
- full_text += f"{text} "
60
 
61
- print(f"[Chunk {idx+1}/{len(chunks)}] ✔") # suivi en live dans console
 
62
 
63
  return full_text.strip()
64
 
 
65
  def transcribe_music(audio, segment_length=15): # 15 sec idéal
66
  y, sr = librosa.load(audio, sr=16000)
67
 
 
37
 
38
  return processor.decode(ids, skip_special_tokens=True)
39
 
40
+ def transcribe_long(audio, chunk_seconds=30, overlap_seconds=5):
41
  if audio is None:
42
  return "Aucun audio fourni."
43
 
 
44
  speech, sr = librosa.load(audio, sr=16000)
 
 
45
  chunk_size = chunk_seconds * sr
46
+ overlap = overlap_seconds * sr
47
+ start = 0
48
  full_text = ""
49
+
50
+ while start < len(speech):
51
+ end = min(start + chunk_size, len(speech))
52
+ chunk = speech[start:end]
53
+
54
  inputs = processor(chunk, sampling_rate=16000, return_tensors="pt").input_features.to(device)
55
 
56
  with torch.no_grad():
57
+ ids = model.generate(inputs, max_length=448)[0]
58
 
59
  text = processor.decode(ids, skip_special_tokens=True)
60
+ full_text += text + " "
61
 
62
+ start += chunk_size - overlap
63
+ print(f"Chunk {start//(chunk_size - overlap)} / {len(speech)//(chunk_size - overlap)}")
64
 
65
  return full_text.strip()
66
 
67
+
68
  def transcribe_music(audio, segment_length=15): # 15 sec idéal
69
  y, sr = librosa.load(audio, sr=16000)
70