Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -17,22 +17,9 @@ import re
|
|
| 17 |
|
| 18 |
# Download NLTK data during initialization
|
| 19 |
try:
|
|
|
|
|
|
|
| 20 |
nltk.download('punkt', quiet=True)
|
| 21 |
-
except Exception as e:
|
| 22 |
-
print(f"Warning: Failed to download NLTK data: {str(e)}")
|
| 23 |
-
print("Downloading from alternative source...")
|
| 24 |
-
try:
|
| 25 |
-
import ssl
|
| 26 |
-
try:
|
| 27 |
-
_create_unverified_https_context = ssl._create_unverified_context
|
| 28 |
-
except AttributeError:
|
| 29 |
-
pass
|
| 30 |
-
else:
|
| 31 |
-
ssl._create_default_https_context = _create_unverified_https_context
|
| 32 |
-
nltk.download('punkt', quiet=True)
|
| 33 |
-
except Exception as e:
|
| 34 |
-
print(f"Critical error downloading NLTK data: {str(e)}")
|
| 35 |
-
raise
|
| 36 |
|
| 37 |
# Configure logging
|
| 38 |
logging.basicConfig(
|
|
@@ -160,7 +147,7 @@ class TTSDatasetCollector:
|
|
| 160 |
|
| 161 |
try:
|
| 162 |
# Try NLTK first
|
| 163 |
-
self.sentences = nltk.sent_tokenize(text.strip()
|
| 164 |
except Exception as e:
|
| 165 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
| 166 |
# Fallback to simple splitting
|
|
@@ -207,7 +194,7 @@ class TTSDatasetCollector:
|
|
| 207 |
|
| 208 |
def get_styled_text(self, text: str) -> str:
|
| 209 |
"""Get text with current font styling"""
|
| 210 |
-
font_css = FONT_STYLES
|
| 211 |
return f"<div style='{font_css}'>{text}</div>"
|
| 212 |
|
| 213 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
|
@@ -532,18 +519,28 @@ def create_interface():
|
|
| 532 |
# Add font-face declarations
|
| 533 |
font_face_css = ""
|
| 534 |
for font_style, font_info in FONT_STYLES.items():
|
| 535 |
-
|
| 536 |
-
|
|
|
|
| 537 |
font_face_css += f"""
|
| 538 |
@font-face {{
|
| 539 |
font-family: '{font_info["family"]}';
|
| 540 |
-
src: url('
|
| 541 |
}}
|
| 542 |
"""
|
|
|
|
|
|
|
|
|
|
| 543 |
|
| 544 |
custom_css += font_face_css
|
| 545 |
|
| 546 |
with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
gr.Markdown("# TTS Dataset Collection Tool")
|
| 548 |
|
| 549 |
with gr.Row():
|
|
@@ -571,7 +568,7 @@ def create_interface():
|
|
| 571 |
elem_classes=["small-input"]
|
| 572 |
)
|
| 573 |
font_select = gr.Dropdown(
|
| 574 |
-
choices=list(FONT_STYLES.keys()),
|
| 575 |
value="english_serif",
|
| 576 |
label="Select Font Style",
|
| 577 |
elem_classes=["small-input"]
|
|
@@ -584,12 +581,6 @@ def create_interface():
|
|
| 584 |
)
|
| 585 |
add_font_btn = gr.Button("Add Custom Font")
|
| 586 |
|
| 587 |
-
status = gr.Textbox(
|
| 588 |
-
label="Status",
|
| 589 |
-
interactive=False,
|
| 590 |
-
max_lines=3
|
| 591 |
-
)
|
| 592 |
-
|
| 593 |
# Dataset Info
|
| 594 |
with gr.Accordion("Dataset Statistics", open=False):
|
| 595 |
dataset_info = gr.JSON(
|
|
@@ -757,8 +748,23 @@ def create_interface():
|
|
| 757 |
if not success:
|
| 758 |
return {status: f"❌ {msg}"}
|
| 759 |
# Update font dropdown
|
| 760 |
-
font_choices = list(FONT_STYLES.keys())
|
| 761 |
font_select.update(choices=font_choices)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 762 |
return {status: f"✅ {msg}"}
|
| 763 |
|
| 764 |
# Event handlers
|
|
|
|
| 17 |
|
| 18 |
# Download NLTK data during initialization
|
| 19 |
try:
|
| 20 |
+
nltk.data.find('tokenizers/punkt')
|
| 21 |
+
except LookupError:
|
| 22 |
nltk.download('punkt', quiet=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Configure logging
|
| 25 |
logging.basicConfig(
|
|
|
|
| 147 |
|
| 148 |
try:
|
| 149 |
# Try NLTK first
|
| 150 |
+
self.sentences = nltk.sent_tokenize(text.strip())
|
| 151 |
except Exception as e:
|
| 152 |
logger.warning(f"NLTK tokenization failed, falling back to simple splitting: {str(e)}")
|
| 153 |
# Fallback to simple splitting
|
|
|
|
| 194 |
|
| 195 |
def get_styled_text(self, text: str) -> str:
|
| 196 |
"""Get text with current font styling"""
|
| 197 |
+
font_css = FONT_STYLES.get(self.current_font, {}).get('css', '')
|
| 198 |
return f"<div style='{font_css}'>{text}</div>"
|
| 199 |
|
| 200 |
def set_font(self, font_style: str) -> Tuple[bool, str]:
|
|
|
|
| 519 |
# Add font-face declarations
|
| 520 |
font_face_css = ""
|
| 521 |
for font_style, font_info in FONT_STYLES.items():
|
| 522 |
+
font_file_name = font_info['family'] + '.ttf'
|
| 523 |
+
font_path = collector.fonts_path / font_file_name
|
| 524 |
+
if os.path.exists(font_path):
|
| 525 |
font_face_css += f"""
|
| 526 |
@font-face {{
|
| 527 |
font-family: '{font_info["family"]}';
|
| 528 |
+
src: url('file/{font_path}') format('truetype');
|
| 529 |
}}
|
| 530 |
"""
|
| 531 |
+
else:
|
| 532 |
+
# For system fonts like 'Arial' and 'Times New Roman', no need to specify src
|
| 533 |
+
pass
|
| 534 |
|
| 535 |
custom_css += font_face_css
|
| 536 |
|
| 537 |
with gr.Blocks(title="TTS Dataset Collection Tool", css=custom_css) as interface:
|
| 538 |
+
status = gr.Textbox(
|
| 539 |
+
label="Status",
|
| 540 |
+
interactive=False,
|
| 541 |
+
max_lines=3
|
| 542 |
+
)
|
| 543 |
+
|
| 544 |
gr.Markdown("# TTS Dataset Collection Tool")
|
| 545 |
|
| 546 |
with gr.Row():
|
|
|
|
| 568 |
elem_classes=["small-input"]
|
| 569 |
)
|
| 570 |
font_select = gr.Dropdown(
|
| 571 |
+
choices=list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys()),
|
| 572 |
value="english_serif",
|
| 573 |
label="Select Font Style",
|
| 574 |
elem_classes=["small-input"]
|
|
|
|
| 581 |
)
|
| 582 |
add_font_btn = gr.Button("Add Custom Font")
|
| 583 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 584 |
# Dataset Info
|
| 585 |
with gr.Accordion("Dataset Statistics", open=False):
|
| 586 |
dataset_info = gr.JSON(
|
|
|
|
| 748 |
if not success:
|
| 749 |
return {status: f"❌ {msg}"}
|
| 750 |
# Update font dropdown
|
| 751 |
+
font_choices = list(FONT_STYLES.keys()) + list(collector.custom_fonts.keys())
|
| 752 |
font_select.update(choices=font_choices)
|
| 753 |
+
# Rebuild CSS to include new font
|
| 754 |
+
font_face_css = ""
|
| 755 |
+
for font_style, font_info in FONT_STYLES.items():
|
| 756 |
+
if font_style in collector.custom_fonts:
|
| 757 |
+
font_file_name = font_info['family'] + '.ttf'
|
| 758 |
+
font_path = collector.fonts_path / font_file_name
|
| 759 |
+
if os.path.exists(font_path):
|
| 760 |
+
font_face_css += f"""
|
| 761 |
+
@font-face {{
|
| 762 |
+
font-family: '{font_info["family"]}';
|
| 763 |
+
src: url('file/{font_path}') format('truetype');
|
| 764 |
+
}}
|
| 765 |
+
"""
|
| 766 |
+
# Update the interface CSS
|
| 767 |
+
interface.set_css(custom_css + font_face_css)
|
| 768 |
return {status: f"✅ {msg}"}
|
| 769 |
|
| 770 |
# Event handlers
|