Update src/streamlit_app.py
Browse files- src/streamlit_app.py +24 -27
src/streamlit_app.py
CHANGED
|
@@ -1104,11 +1104,12 @@ if language_option == "Specify Language":
|
|
| 1104 |
)
|
| 1105 |
selected_language = language_codes[selected_language_name]
|
| 1106 |
|
| 1107 |
-
# Translation option
|
|
|
|
| 1108 |
translate_to_english = st.sidebar.checkbox(
|
| 1109 |
-
"Translate
|
| 1110 |
value=True,
|
| 1111 |
-
help="
|
| 1112 |
)
|
| 1113 |
|
| 1114 |
# Load models
|
|
@@ -1208,23 +1209,25 @@ if uploaded_file is not None:
|
|
| 1208 |
transcribe_options = {
|
| 1209 |
"verbose": False, # Set to True for debugging
|
| 1210 |
"fp16": False, # Use FP32 for CPU compatibility
|
| 1211 |
-
"condition_on_previous_text":
|
| 1212 |
"compression_ratio_threshold": 2.4, # Prevent early stopping
|
| 1213 |
"logprob_threshold": -1.0, # Lower threshold for better detection
|
| 1214 |
-
"no_speech_threshold": 0.3, #
|
| 1215 |
"temperature": 0.0, # Deterministic output
|
| 1216 |
"best_of": 1, # Use single best result
|
| 1217 |
-
"beam_size":
|
| 1218 |
}
|
| 1219 |
-
|
|
|
|
| 1220 |
if translate_to_english:
|
| 1221 |
-
#
|
| 1222 |
transcribe_options["task"] = "translate"
|
| 1223 |
-
#
|
| 1224 |
-
if selected_language
|
| 1225 |
transcribe_options["language"] = selected_language
|
|
|
|
| 1226 |
else:
|
| 1227 |
-
#
|
| 1228 |
if selected_language:
|
| 1229 |
transcribe_options["language"] = selected_language
|
| 1230 |
|
|
@@ -1237,11 +1240,6 @@ if uploaded_file is not None:
|
|
| 1237 |
# Verify we got full transcription
|
| 1238 |
total_segments = len(result.get("segments", []))
|
| 1239 |
transcription_text = result.get("text", "").strip()
|
| 1240 |
-
|
| 1241 |
-
# If main text is empty but we have segments, reconstruct from segments
|
| 1242 |
-
if not transcription_text and total_segments > 0:
|
| 1243 |
-
transcription_text = " ".join([seg.get("text", "").strip() for seg in result.get("segments", []) if seg.get("text", "").strip()])
|
| 1244 |
-
|
| 1245 |
transcription_length = len(transcription_text)
|
| 1246 |
|
| 1247 |
if total_segments == 0:
|
|
@@ -1376,16 +1374,8 @@ if uploaded_file is not None:
|
|
| 1376 |
# Display results
|
| 1377 |
# Main transcription text
|
| 1378 |
st.subheader("📝 Transcription")
|
| 1379 |
-
|
| 1380 |
-
# Extract transcription text - try multiple methods
|
| 1381 |
transcription_text = result.get("text", "").strip()
|
| 1382 |
|
| 1383 |
-
# If main text is empty but we have segments, reconstruct from segments
|
| 1384 |
-
if not transcription_text:
|
| 1385 |
-
segments = result.get("segments", [])
|
| 1386 |
-
if segments:
|
| 1387 |
-
transcription_text = " ".join([seg.get("text", "").strip() for seg in segments if seg.get("text", "").strip()])
|
| 1388 |
-
|
| 1389 |
# Show transcription statistics
|
| 1390 |
total_segments = len(result.get("segments", []))
|
| 1391 |
if total_segments > 0:
|
|
@@ -1788,10 +1778,17 @@ if uploaded_file is not None:
|
|
| 1788 |
"ur": "Urdu"
|
| 1789 |
}
|
| 1790 |
language_display = language_names.get(detected_language, detected_language.upper())
|
| 1791 |
-
|
| 1792 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1793 |
else:
|
| 1794 |
-
|
|
|
|
|
|
|
|
|
|
| 1795 |
|
| 1796 |
if aligned_segments:
|
| 1797 |
unique_speakers = set(seg["speaker"] for seg in aligned_segments)
|
|
|
|
| 1104 |
)
|
| 1105 |
selected_language = language_codes[selected_language_name]
|
| 1106 |
|
| 1107 |
+
# Translation option
|
| 1108 |
+
st.sidebar.markdown("---")
|
| 1109 |
translate_to_english = st.sidebar.checkbox(
|
| 1110 |
+
"🌍 Translate to English",
|
| 1111 |
value=True,
|
| 1112 |
+
help="Convert any language (Hindi, Spanish, etc.) to English text. Recommended for Hindi videos."
|
| 1113 |
)
|
| 1114 |
|
| 1115 |
# Load models
|
|
|
|
| 1209 |
transcribe_options = {
|
| 1210 |
"verbose": False, # Set to True for debugging
|
| 1211 |
"fp16": False, # Use FP32 for CPU compatibility
|
| 1212 |
+
"condition_on_previous_text": True, # Better context
|
| 1213 |
"compression_ratio_threshold": 2.4, # Prevent early stopping
|
| 1214 |
"logprob_threshold": -1.0, # Lower threshold for better detection
|
| 1215 |
+
"no_speech_threshold": 0.3, # Much lower threshold to catch more speech (default is 0.6)
|
| 1216 |
"temperature": 0.0, # Deterministic output
|
| 1217 |
"best_of": 1, # Use single best result
|
| 1218 |
+
"beam_size": 5, # Beam search size for better accuracy
|
| 1219 |
}
|
| 1220 |
+
|
| 1221 |
+
# Handle translation and language options
|
| 1222 |
if translate_to_english:
|
| 1223 |
+
# TRANSLATE mode: convert any language to English
|
| 1224 |
transcribe_options["task"] = "translate"
|
| 1225 |
+
# Optionally specify source language for better accuracy
|
| 1226 |
+
if selected_language:
|
| 1227 |
transcribe_options["language"] = selected_language
|
| 1228 |
+
# Note: Output will always be English regardless of input language
|
| 1229 |
else:
|
| 1230 |
+
# TRANSCRIBE mode: output in the same language as input
|
| 1231 |
if selected_language:
|
| 1232 |
transcribe_options["language"] = selected_language
|
| 1233 |
|
|
|
|
| 1240 |
# Verify we got full transcription
|
| 1241 |
total_segments = len(result.get("segments", []))
|
| 1242 |
transcription_text = result.get("text", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1243 |
transcription_length = len(transcription_text)
|
| 1244 |
|
| 1245 |
if total_segments == 0:
|
|
|
|
| 1374 |
# Display results
|
| 1375 |
# Main transcription text
|
| 1376 |
st.subheader("📝 Transcription")
|
|
|
|
|
|
|
| 1377 |
transcription_text = result.get("text", "").strip()
|
| 1378 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1379 |
# Show transcription statistics
|
| 1380 |
total_segments = len(result.get("segments", []))
|
| 1381 |
if total_segments > 0:
|
|
|
|
| 1778 |
"ur": "Urdu"
|
| 1779 |
}
|
| 1780 |
language_display = language_names.get(detected_language, detected_language.upper())
|
| 1781 |
+
|
| 1782 |
+
if translate_to_english:
|
| 1783 |
+
if selected_language:
|
| 1784 |
+
st.info(f"🌍 Source: {language_names.get(selected_language, selected_language.upper())} (Detected: {language_display}) → Translated to English ✓")
|
| 1785 |
+
else:
|
| 1786 |
+
st.info(f"🌍 Detected Language: {language_display} → Translated to English ✓")
|
| 1787 |
else:
|
| 1788 |
+
if selected_language:
|
| 1789 |
+
st.info(f"Specified Language: {language_names.get(selected_language, selected_language.upper())} | Detected: {language_display}")
|
| 1790 |
+
else:
|
| 1791 |
+
st.info(f"Detected Language: {language_display}")
|
| 1792 |
|
| 1793 |
if aligned_segments:
|
| 1794 |
unique_speakers = set(seg["speaker"] for seg in aligned_segments)
|