import gradio as gr
from smolagents import CodeAgent, HfApiModel, tool

@tool
def analyze_transcript_segment(segment: str) -> dict:
    """
    Analyzes a segment of text to identify key topics, concepts, steps, and terms.
    
    Args:
        segment: A segment of the transcript text.
    """
    # This function doesn't need to do anything - the agent will use its model
    # to analyze the transcript when this tool is called
    return {
        "key_topics": [],
        "main_concepts": [],
        "steps_processes": [],
        "important_terms": []
    }

def create_transcript_analyzer():
    """
    Creates and returns a transcript analyzer agent optimized for free Spaces.
    """
    # Use a reliable model suitable for free Spaces
    # Mistral-7B is well-supported on the Inference API
    model = HfApiModel(model_id="mistralai/Mistral-7B-Instruct-v0.2")
    
    # Create a simpler agent with just the analysis tool
    agent = CodeAgent(
        tools=[analyze_transcript_segment],
        model=model,
        max_steps=3,  # Limit steps to conserve resources
        verbosity_level=0  # Reduce logging to conserve memory
    )
    
    return agent

def analyze_transcript(transcript_chunk):
    """
    Analyzes a chunk of transcript text.
    
    Args:
        transcript_chunk: The transcript text to analyze
        
    Returns:
        Dictionary with analysis results
    """
    if not transcript_chunk or transcript_chunk.strip() == "":
        return {
            "key_topics": ["Please provide transcript text to analyze"],
            "main_concepts": [],
            "steps_processes": [],
            "important_terms": []
        }
    
    # Check input length and truncate if necessary to avoid token limit errors
    max_chars = 1500  # Conservative limit to stay under token limits
    truncated = False
    if len(transcript_chunk) > max_chars:
        transcript_chunk = transcript_chunk[:max_chars]
        truncated = True
    
    agent = create_transcript_analyzer()
    
    task = f"""
    Analyze this transcript chunk carefully and identify:
    1. Key topics discussed (3-5 main topics)
    2. Main concepts explained (the core ideas being communicated)
    3. Any steps or processes mentioned (numbered or sequential instructions)
    4. Important terms or definitions (specialized vocabulary or key phrases)
    
    IMPORTANT: Return ONLY a Python dictionary with these four categories. Each category should contain a list of strings.
    Use this exact format:
    {{
        "key_topics": ["Topic 1", "Topic 2", "Topic 3"],
        "main_concepts": ["Concept 1", "Concept 2", "Concept 3"],
        "steps_processes": ["Step 1: Do this", "Step 2: Do that"],
        "important_terms": ["Term 1 - definition", "Term 2 - definition"]
    }}
    
    Transcript chunk to analyze:{" (truncated due to length)" if truncated else ""}
    {transcript_chunk}
    """
    
    try:
        result = agent.run(task)
        # Parse the result to extract the dictionary, handling potential formatting issues
        if isinstance(result, str):
            import ast
            try:
                # Try to parse as a literal Python dictionary
                parsed_result = ast.literal_eval(result)
                if isinstance(parsed_result, dict):
                    return parsed_result
            except:
                # If not a valid dictionary, try to extract it from the text
                import re
                dict_match = re.search(r'({.*})', result, re.DOTALL)
                if dict_match:
                    try:
                        parsed_result = ast.literal_eval(dict_match.group(1))
                        if isinstance(parsed_result, dict):
                            return parsed_result
                    except:
                        pass
                
                # If all parsing attempts fail, create a structured result manually
                return {
                    "key_topics": extract_section(result, "key_topics"),
                    "main_concepts": extract_section(result, "main_concepts"),
                    "steps_processes": extract_section(result, "steps_processes"),
                    "important_terms": extract_section(result, "important_terms")
                }
        elif isinstance(result, dict):
            return result
        else:
            return {
                "key_topics": ["Error: Could not parse result"],
                "main_concepts": [],
                "steps_processes": [],
                "important_terms": []
            }
    except Exception as e:
        return {
            "key_topics": [f"Error analyzing transcript: {str(e)}"],
            "main_concepts": [],
            "steps_processes": [],
            "important_terms": []
        }

def extract_section(text, section_name):
    """Helper function to extract sections from text if parsing fails"""
    import re
    pattern = rf"{section_name}[:\s]+(.*?)(?:\n\n|\n[A-Za-z_]+:|$)"
    match = re.search(pattern, text, re.DOTALL | re.IGNORECASE)
    if match:
        items = re.findall(r'[-*\d]+\s+(.*?)(?:\n[-*\d]+|$)', match.group(1), re.DOTALL)
        if items:
            return [item.strip() for item in items]
        else:
            return [match.group(1).strip()]
    return ["Not found in analysis"]

# Create Gradio interface
def create_interface():
    with gr.Blocks(title="YouTube Transcript Analyzer") as demo:
        gr.Markdown("# YouTube Transcript Analyzer")
        gr.Markdown("Paste a segment of a YouTube transcript below to analyze it.")
        
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(
                    label="Transcript Chunk", 
                    placeholder="Paste your transcript text here... (Note: Very long texts will be automatically truncated)",
                    lines=10,
                    max_lines=15
                )
                analyze_btn = gr.Button("Analyze Transcript")
            
            with gr.Column():
                topics_output = gr.Json(label="Analysis Results")
        
        analyze_btn.click(
            fn=analyze_transcript,
            inputs=input_text,
            outputs=topics_output
        )
        
        gr.Markdown("""
        ## How to Use
        1. Paste a segment of a YouTube transcript (or any transcript) in the text box
        2. Click 'Analyze Transcript'
        3. View the analyzed topics, concepts, steps, and important terms
        
        ## Limitations
        - Works best with transcript chunks of 1-3 paragraphs
        - Very long inputs will be automatically truncated to avoid token limit errors
        - May take 10-20 seconds to analyze depending on server load
        - Free version has limited capacity - please be patient
        """)
    
    return demo

# Create and launch the interface
demo = create_interface()

# For Hugging Face Spaces
if __name__ == "__main__":
    demo.launch()