youtube_tutorial / app.py

Upload folder using huggingface_hub

4d338c7 verified 10 months ago

6.79 kB

	"""
	YouTube Tutorial to Step-by-Step Guide Generator
	Main application file for Hugging Face Space deployment
	"""
	import os
	import logging
	import time
	import tempfile
	from typing import Dict, List, Optional, Any

	import gradio as gr
	import numpy as np
	from huggingface_hub import HfApi, login
	from dotenv import load_dotenv

	# Import custom modules
	from smolagent_processor import SmoLAgentProcessor
	import ui_components
	import youtube_utils
	import memory_utils

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	logger = logging.getLogger(__name__)

	# Load environment variables
	load_dotenv()

	# Initialize Hugging Face Hub API
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN:
	login(token=HF_TOKEN)
	logger.info("Logged in to Hugging Face Hub")
	else:
	logger.warning("No Hugging Face token found. Some features may be limited.")

	# Main application functions
	def process_video(video_url: str, progress=gr.Progress()) -> Dict[str, Any]:
	"""
	Process YouTube video and generate step-by-step guide.

	Args:
	video_url: YouTube video URL
	progress: Gradio progress indicator

	Returns:
	Dictionary with processed video information and steps
	"""
	logger.info(f"Processing video: {video_url}")
	start_time = time.time()

	# Extract video ID
	video_id = youtube_utils.extract_video_id(video_url)
	if not video_id:
	return {"error": "Invalid YouTube URL. Please provide a valid YouTube video URL."}

	progress(0.1, "Extracting video information...")

	# Get video information
	video_info = youtube_utils.get_video_info(video_id)
	if "error" in video_info:
	return {"error": video_info["error"]}

	# Add video ID to video info
	video_info["id"] = video_id

	progress(0.2, "Getting video transcript...")

	# Get transcript
	transcript = youtube_utils.get_transcript(video_id)

	progress(0.3, "Getting video chapters...")

	# Get chapters
	chapters = youtube_utils.get_video_chapters(video_id)

	progress(0.4, "Processing transcript...")

	# Process transcript to extract steps
	processor = SmoLAgentProcessor()

	# Log memory usage
	memory_utils.log_memory_usage()

	# Process transcript
	steps = processor.process_transcript(transcript, chapters)

	progress(0.9, "Finalizing results...")

	# Log memory usage after processing
	memory_utils.log_memory_usage()

	# Calculate processing time
	processing_time = time.time() - start_time
	logger.info(f"Processing completed in {processing_time:.2f} seconds")

	# Return results
	return {
	"video_info": video_info,
	"chapters": chapters,
	"steps": steps,
	"memory_usage": memory_utils.get_memory_usage(),
	"processing_time": processing_time
	}

	def create_interface() -> gr.Blocks:
	"""
	Create Gradio interface for the application.

	Returns:
	Gradio Blocks interface
	"""
	with gr.Blocks(css=ui_components.CUSTOM_CSS) as app:
	gr.Markdown("# YouTube Tutorial to Step-by-Step Guide")
	gr.Markdown("Convert any YouTube tutorial into an editable, time-stamped guide with code detection.")

	with gr.Row():
	with gr.Column(scale=3):
	video_url = gr.Textbox(
	label="YouTube Video URL",
	placeholder="https://www.youtube.com/watch?v=...",
	info="Enter the URL of a YouTube tutorial video"
	)

	submit_btn = gr.Button("Generate Guide", variant="primary")

	with gr.Accordion("Advanced Options", open=False):
	memory_info = gr.Markdown(ui_components.format_memory_usage(memory_utils.get_memory_usage()))

	with gr.Column(scale=1):
	gr.Markdown("""
	## How it works
	1. Enter a YouTube tutorial URL
	2. The app extracts the transcript and detects chapters
	3. It processes the content to identify steps and code snippets
	4. You get an editable guide with timestamps

	## Features
	- Automatic chapter detection
	- Code snippet identification
	- Editable steps and code
	- Export to Markdown
	""")

	with gr.Tabs() as tabs:
	with gr.TabItem("Guide"):
	with gr.Row():
	with gr.Column(scale=1):
	video_info_md = gr.Markdown("Enter a YouTube URL and click 'Generate Guide'")

	with gr.Column(scale=1):
	chapters_md = gr.Markdown("")

	steps_md = gr.Markdown("")

	with gr.Row():
	export_md_btn = gr.Button("Export to Markdown")
	export_md = gr.Textbox(
	label="Markdown Export",
	visible=False,
	lines=10
	)

	with gr.TabItem("Edit"):
	steps_df = gr.Dataframe(
	headers=["Timestamp", "Text", "Is Code", "Code Language", "Code Content"],
	datatype=["str", "str", "bool", "str", "str"],
	col_count=(5, "fixed"),
	interactive=True
	)

	update_steps_btn = gr.Button("Update Guide")

	# Event handlers
	submit_btn.click(
	fn=process_video,
	inputs=[video_url],
	outputs=[video_info_md, chapters_md, steps_md, steps_df, memory_info]
	)

	export_md_btn.click(
	fn=lambda steps, video_info: ui_components.create_export_markdown(steps, video_info),
	inputs=[steps_md, video_info_md],
	outputs=[export_md]
	).then(
	fn=lambda: True,
	outputs=[export_md]
	)

	update_steps_btn.click(
	fn=lambda df_data, video_info: {
	"steps": ui_components.dataframe_to_steps(df_data),
	"video_info": video_info
	},
	inputs=[steps_df, video_info_md],
	outputs=[steps_md]
	)

	# Custom JavaScript for embedding YouTube player
	app.load(lambda: None, None, None, _js=ui_components.YOUTUBE_EMBED_JS)

	return app

	# Launch the application
	if __name__ == "__main__":
	app = create_interface()
	app.launch()