import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import re

# ============================================================================
# CONFIGURATION
# ============================================================================

MODEL_NAME = "armaniii/Qwen3-4B-populism"  # Update with your HuggingFace username

# Prompts from helpers.py
SYSTEM_PROMPT = ('You are a helpful AI assistant with expertise in identifying populism in public discourse.\n\n'
                 'Populism can be defined as an anti-elite discourse in the name of the "people". '
                 'In other words, populism emphasizes the idea of the common "people" and '
                 'often positions this group in opposition to a perceived elite group.\n\n'
                 'There are two core elements in identifying populism: '
                 '(i) anti-elitism, i.e., negative invocations of "elites", and '
                 '(ii) people-centrism, i.e., positive invocations of the "people".\n\n')

INSTRUCTION = ('You must classify each sentence in one of the following categories:\n\n'
               '(a) No populism.\n'
               '(b) Anti-elitism, i.e., negative invocations of "elites".\n'
               '(c) People-centrism, i.e., positive invocations of the "People".\n'
               '(d) Both people-centrism and anti-elitism populism.\n\n')

QUERY = 'Which is the most relevant category for the sentence: "{}"?'
RESPONSE_START = 'I would categorize this sentence as ('

# Label mappings
LABEL_MAP = {
    'a': '🔵 No Populism',
    'b': '🟡 Anti-Elitism',
    'c': '🟢 People-Centrism',
    'd': '🔴 Both (Anti-Elitism + People-Centrism)'
}

LABEL_DESCRIPTIONS = {
    'a': 'The sentence does not contain populist rhetoric.',
    'b': 'The sentence contains negative references to elites or elite groups.',
    'c': 'The sentence contains positive references to "the people" or common citizens.',
    'd': 'The sentence combines both anti-elite rhetoric and people-centric language.'
}

# ============================================================================
# MODEL LOADING
# ============================================================================

print("Loading model and tokenizer...")

# Detect device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Determine dtype based on device
if device == "cuda":
    # Use float16 for GPU (more compatible than bfloat16)
    torch_dtype = torch.float16
else:
    # Use float32 for CPU
    torch_dtype = torch.float32

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

# Load model (it's quantized, so bitsandbytes will handle it)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    device_map="auto",
    low_cpu_mem_usage=True,
    trust_remote_code=True
)

# Create pipeline
# Don't pass device since we used device_map="auto"
text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer
)

print("Model loaded successfully!")

# ============================================================================
# INFERENCE FUNCTION
# ============================================================================

def extract_label(response_text):
    """Extract the label (a, b, c, or d) from model response."""
    # Look for patterns like "(a)", "(b)", "(c)", "(d)"
    match = re.search(r'\(([abcd])\)', response_text.lower())
    if match:
        return match.group(1)

    # Fallback: look for the label words
    text_lower = response_text.lower()
    if 'both' in text_lower or 'anti-elitism and people-centrism' in text_lower:
        return 'd'
    elif 'anti-elitism' in text_lower or 'anti-elite' in text_lower:
        return 'b'
    elif 'people-centrism' in text_lower or 'people-centric' in text_lower:
        return 'c'
    elif 'no populism' in text_lower:
        return 'a'

    return 'a'  # Default to no populism if unclear


def classify_sentence(sentence, progress=gr.Progress()):
    """Classify a sentence for populist rhetoric."""
    print(f"\n{'='*80}")
    print(f"CLASSIFY REQUEST: {sentence[:100]}...")
    print(f"{'='*80}")

    if not sentence or sentence.strip() == "":
        status_msg = "⚠️ Please enter a sentence to classify."
        yield status_msg, "", ""
        return  # End generator

    try:
        # Step 1: Building prompt
        progress(0.1, desc="🔨 Building prompt...")
        print("Step 1: Building prompt...")
        status_1 = "⏳ **Step 1/4:** Building prompt..."
        yield status_1, "", ""

        conversation = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user", "content": INSTRUCTION + '\n\n' + QUERY.format(sentence)}
        ]

        prompt = tokenizer.apply_chat_template(
            conversation=conversation,
            tokenize=False,
            add_generation_prompt=True,
            enable_thinking=False  # Disable thinking to match audit_llms.py behavior
        )
        print(f"Prompt length: {len(prompt)} chars")

        # Step 2: Running model inference
        progress(0.3, desc="🤖 Running model inference...")
        print("Step 2: Running model inference...")
        status_2 = "⏳ **Step 2/4:** Running model inference... (this may take 10-30 seconds)"
        yield status_2, "", ""

        responses = text_pipeline(
            prompt,
            do_sample=False,
            num_return_sequences=1,
            return_full_text=False,
            max_new_tokens=256,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.eos_token_id,
        )

        # Step 3: Processing response
        progress(0.8, desc="⚙️ Processing results...")
        print("Step 3: Processing results...")
        status_3 = "⏳ **Step 3/4:** Processing results..."
        yield status_3, "", ""

        response_text = RESPONSE_START + responses[0]['generated_text']
        print(f"Response: {response_text[:200]}...")

        # Extract label
        label = extract_label(response_text)
        label_name = LABEL_MAP.get(label, '❓ Unknown')
        label_desc = LABEL_DESCRIPTIONS.get(label, '')
        print(f"Extracted label: {label} -> {label_name}")

        # Step 4: Formatting output
        progress(0.95, desc="📝 Formatting output...")
        print("Step 4: Formatting output...")

        # Determine background color based on label
        color_map = {
            'a': {'bg': '#e3f2fd', 'text': '#0d47a1', 'border': '#1976d2', 'name': 'No Populism'},
            'b': {'bg': '#fff9c4', 'text': '#e65100', 'border': '#f57f17', 'name': 'Anti-Elitism'},
            'c': {'bg': '#e8f5e9', 'text': '#1b5e20', 'border': '#388e3c', 'name': 'People-Centrism'},
            'd': {'bg': '#ffebee', 'text': '#b71c1c', 'border': '#d32f2f', 'name': 'Both'}
        }
        colors = color_map.get(label, color_map['a'])

        # Result card HTML with DARK text on light backgrounds
        result_html = f"""
<div style="border: 2px solid {colors['border']}; border-radius: 16px; overflow: hidden; box-shadow: 0 8px 16px rgba(0,0,0,0.15); margin: 20px 0; background-color: white;">
    <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; text-align: center;">
        <h2 style="margin: 0; font-size: 1.4em; font-weight: 600;">🎯 Classification Result</h2>
    </div>
    <div style="background-color: {colors['bg']}; padding: 40px 30px; text-align: center; border-left: 8px solid {colors['border']};">
        <div style="font-size: 3em; margin-bottom: 10px;">{label_name.split()[0]}</div>
        <h1 style="margin: 0; font-size: 2.2em; font-weight: bold; color: {colors['text']}; line-height: 1.3;">
            {colors['name']}
        </h1>
    </div>
    <div style="background-color: #f8f9fa; padding: 25px 30px; border-top: 2px solid #e0e0e0;">
        <p style="margin: 0; font-size: 1.15em; color: #333; line-height: 1.6;">
            <strong style="color: #000;">Description:</strong> {label_desc}
        </p>
    </div>
</div>
"""

        # Reasoning markdown with VISIBLE text
        reasoning_md = f"""
### Model's Detailed Reasoning

{response_text}

---

**Note:** This is the model's explanation for its classification decision. The reasoning shows how the model analyzed the sentence based on the populism detection criteria.
"""

        progress(1.0, desc="✅ Complete!")
        print(f"Classification complete! Label: {label_name}")
        print(f"{'='*80}\n")

        # Final result with success status - YIELD 3 outputs
        final_status = f"✅ **Classification Complete!** Result: {label_name}"
        yield final_status, result_html, reasoning_md

    except Exception as e:
        error_html = f"""
<div style="border: 2px solid #f44336; border-radius: 12px; padding: 25px; background-color: #ffebee; margin: 20px 0;">
    <h3 style="color: #b71c1c; margin-top: 0; font-size: 1.5em;">⚠️ Error</h3>
    <p style="color: #333; font-size: 1.1em; line-height: 1.6;">An error occurred during classification. Please try again.</p>
    <p style="color: #333;"><strong style="color: #000;">Error details:</strong> {str(e)}</p>
    <p style="margin-bottom: 0; color: #666;">If the problem persists, please check the model configuration.</p>
</div>
"""
        print(f"Error in classify_sentence: {e}")
        import traceback
        traceback.print_exc()
        error_status = f"❌ **Error:** {str(e)}"
        error_reasoning = "No reasoning available due to error."
        yield error_status, error_html, error_reasoning


# ============================================================================
# GRADIO INTERFACE
# ============================================================================

# Example sentences
examples = [
    ["We need to stand up against the corrupt establishment that has betrayed the American people."],
    ["The policy aims to improve economic growth through targeted investments."],
    ["It's time for ordinary Americans to take back control from the political elites."],
    ["We must fight for the rights and interests of everyday working families."],
    ["The meeting will be held next Tuesday at 3 PM."],
    ["The elite media doesn't care about regular folks like us."],
    ["Together, we the people can build a better future for our children."],
]

# Custom CSS
custom_css = """
#component-0 {
    max-width: 1000px;
    margin: auto;
    padding: 20px;
}

/* Status box styling */
#status_box {
    padding: 15px !important;
    border-radius: 10px !important;
    margin: 15px 0 !important;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
    font-weight: 500 !important;
    box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1) !important;
    min-height: 50px !important;
    text-align: center !important;
}

/* Result box container */
#output_box {
    padding: 0 !important;
    margin-top: 20px !important;
}

/* Make sure HTML renders properly */
#output_box .prose {
    max-width: 100% !important;
}

footer {
    display: none !important;
}
"""

# Build interface
with gr.Blocks(css=custom_css, title="Populism Detector") as demo:
    gr.Markdown("""
    # 🎯 Populism Detection in Political Discourse

    This tool uses a fine-tuned language model to detect populist rhetoric in sentences.
    Populism is characterized by anti-elite discourse in the name of "the people."

    ### About the Categories

    - **🔵 No Populism**: The sentence does not contain populist elements
    - **🟡 Anti-Elitism**: Negative references to elites or powerful groups
    - **🟢 People-Centrism**: Positive references to "the people" or ordinary citizens
    - **🔴 Both**: Combines anti-elite rhetoric with people-centric language

    ---
    """)

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Enter a sentence to analyze",
                placeholder="Type or paste a sentence here...",
                lines=3
            )
            submit_btn = gr.Button("🔍 Classify Sentence", variant="primary", size="lg")

    # Status indicator
    status_box = gr.Markdown(
        value="",
        label="Status",
        visible=True,
        elem_id="status_box"
    )

    # Classification result card (always visible)
    result_card = gr.HTML(
        value="",
        label="Classification Result"
    )

    # Model reasoning (collapsible)
    with gr.Accordion("📖 Show Model Reasoning", open=False) as reasoning_accordion:
        reasoning_text = gr.Markdown(
            value="",
            label=""
        )

    gr.Markdown("### 📝 Try these examples:")
    gr.Examples(
        examples=examples,
        inputs=input_text,
        outputs=[status_box, result_card, reasoning_text],
        fn=classify_sentence,
        cache_examples=False
    )

    # Button click handler with loading state
    submit_btn.click(
        fn=classify_sentence,
        inputs=input_text,
        outputs=[status_box, result_card, reasoning_text],
        api_name="classify",
        show_progress="full"  # Show full progress bar with percentage
    )

    # Enter key handler
    input_text.submit(
        fn=classify_sentence,
        inputs=input_text,
        outputs=[status_box, result_card, reasoning_text],
        show_progress="full"
    )

    gr.Markdown("""
    ---

    ### ℹ️ About This Model

    This model is fine-tuned on political speeches to identify populist rhetoric.
    It analyzes sentence-level discourse and classifies based on two key dimensions:

    1. **Anti-Elitism**: Language that criticizes or opposes elite groups
    2. **People-Centrism**: Language that emphasizes or champions "the people"

    **Note**: This tool is for research and educational purposes. Results should be
    interpreted in context and not used as definitive political classifications.

    Built with [Qwen3-4B](https://huggingface.co/Qwen/Qwen3-4B) fine-tuned on populist speech data.
    """)

# Launch the app
if __name__ == "__main__":
    # Enable queue for progress tracking
    demo.queue()
    demo.launch()