abdulshakur's picture
Upload folder using huggingface_hub
4d338c7 verified
"""
YouTube Tutorial to Step-by-Step Guide Generator
Main application file for Hugging Face Space deployment
"""
import os
import logging
import time
import tempfile
from typing import Dict, List, Optional, Any
import gradio as gr
import numpy as np
from huggingface_hub import HfApi, login
from dotenv import load_dotenv
# Import custom modules
from smolagent_processor import SmoLAgentProcessor
import ui_components
import youtube_utils
import memory_utils
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger(__name__)
# Load environment variables
load_dotenv()
# Initialize Hugging Face Hub API
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
logger.info("Logged in to Hugging Face Hub")
else:
logger.warning("No Hugging Face token found. Some features may be limited.")
# Main application functions
def process_video(video_url: str, progress=gr.Progress()) -> Dict[str, Any]:
"""
Process YouTube video and generate step-by-step guide.
Args:
video_url: YouTube video URL
progress: Gradio progress indicator
Returns:
Dictionary with processed video information and steps
"""
logger.info(f"Processing video: {video_url}")
start_time = time.time()
# Extract video ID
video_id = youtube_utils.extract_video_id(video_url)
if not video_id:
return {"error": "Invalid YouTube URL. Please provide a valid YouTube video URL."}
progress(0.1, "Extracting video information...")
# Get video information
video_info = youtube_utils.get_video_info(video_id)
if "error" in video_info:
return {"error": video_info["error"]}
# Add video ID to video info
video_info["id"] = video_id
progress(0.2, "Getting video transcript...")
# Get transcript
transcript = youtube_utils.get_transcript(video_id)
progress(0.3, "Getting video chapters...")
# Get chapters
chapters = youtube_utils.get_video_chapters(video_id)
progress(0.4, "Processing transcript...")
# Process transcript to extract steps
processor = SmoLAgentProcessor()
# Log memory usage
memory_utils.log_memory_usage()
# Process transcript
steps = processor.process_transcript(transcript, chapters)
progress(0.9, "Finalizing results...")
# Log memory usage after processing
memory_utils.log_memory_usage()
# Calculate processing time
processing_time = time.time() - start_time
logger.info(f"Processing completed in {processing_time:.2f} seconds")
# Return results
return {
"video_info": video_info,
"chapters": chapters,
"steps": steps,
"memory_usage": memory_utils.get_memory_usage(),
"processing_time": processing_time
}
def create_interface() -> gr.Blocks:
"""
Create Gradio interface for the application.
Returns:
Gradio Blocks interface
"""
with gr.Blocks(css=ui_components.CUSTOM_CSS) as app:
gr.Markdown("# YouTube Tutorial to Step-by-Step Guide")
gr.Markdown("Convert any YouTube tutorial into an editable, time-stamped guide with code detection.")
with gr.Row():
with gr.Column(scale=3):
video_url = gr.Textbox(
label="YouTube Video URL",
placeholder="https://www.youtube.com/watch?v=...",
info="Enter the URL of a YouTube tutorial video"
)
submit_btn = gr.Button("Generate Guide", variant="primary")
with gr.Accordion("Advanced Options", open=False):
memory_info = gr.Markdown(ui_components.format_memory_usage(memory_utils.get_memory_usage()))
with gr.Column(scale=1):
gr.Markdown("""
## How it works
1. Enter a YouTube tutorial URL
2. The app extracts the transcript and detects chapters
3. It processes the content to identify steps and code snippets
4. You get an editable guide with timestamps
## Features
- Automatic chapter detection
- Code snippet identification
- Editable steps and code
- Export to Markdown
""")
with gr.Tabs() as tabs:
with gr.TabItem("Guide"):
with gr.Row():
with gr.Column(scale=1):
video_info_md = gr.Markdown("Enter a YouTube URL and click 'Generate Guide'")
with gr.Column(scale=1):
chapters_md = gr.Markdown("")
steps_md = gr.Markdown("")
with gr.Row():
export_md_btn = gr.Button("Export to Markdown")
export_md = gr.Textbox(
label="Markdown Export",
visible=False,
lines=10
)
with gr.TabItem("Edit"):
steps_df = gr.Dataframe(
headers=["Timestamp", "Text", "Is Code", "Code Language", "Code Content"],
datatype=["str", "str", "bool", "str", "str"],
col_count=(5, "fixed"),
interactive=True
)
update_steps_btn = gr.Button("Update Guide")
# Event handlers
submit_btn.click(
fn=process_video,
inputs=[video_url],
outputs=[video_info_md, chapters_md, steps_md, steps_df, memory_info]
)
export_md_btn.click(
fn=lambda steps, video_info: ui_components.create_export_markdown(steps, video_info),
inputs=[steps_md, video_info_md],
outputs=[export_md]
).then(
fn=lambda: True,
outputs=[export_md]
)
update_steps_btn.click(
fn=lambda df_data, video_info: {
"steps": ui_components.dataframe_to_steps(df_data),
"video_info": video_info
},
inputs=[steps_df, video_info_md],
outputs=[steps_md]
)
# Custom JavaScript for embedding YouTube player
app.load(lambda: None, None, None, _js=ui_components.YOUTUBE_EMBED_JS)
return app
# Launch the application
if __name__ == "__main__":
app = create_interface()
app.launch()